1.23 修了开评定标提示词 废标无效标 增加了'十、废标条件' 这种的提取
This commit is contained in:
parent
ab74e4d03e
commit
d8d5ece4fc
@ -351,7 +351,7 @@ def extract_text_with_keywords(processed_paragraphs, keywords, follow_up_keyword
|
|||||||
current_section_pattern = re.compile(combined_pattern)
|
current_section_pattern = re.compile(combined_pattern)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
process_matching_section(None) # Reusing the common function for matching sections
|
process_matching_section() # Reusing the common function for matching sections
|
||||||
|
|
||||||
return current_index
|
return current_index
|
||||||
|
|
||||||
@ -662,12 +662,12 @@ if __name__ == '__main__':
|
|||||||
# clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json"
|
# clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json"
|
||||||
# doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx"
|
# doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx"
|
||||||
# doc_path = r'C:\Users\Administrator\Desktop\new招标文件\tmp\2024-贵州-贵州省罗甸县 2024 年度广州市协作资金龙坪镇、边阳镇产业路硬化建设项目.docx'
|
# doc_path = r'C:\Users\Administrator\Desktop\new招标文件\tmp\2024-贵州-贵州省罗甸县 2024 年度广州市协作资金龙坪镇、边阳镇产业路硬化建设项目.docx'
|
||||||
pdf_path = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\2025-湖北-通羊镇港口村人饮项目谈判文件.pdf'
|
pdf_path = r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\招标文件(107国道)_invalid.pdf'
|
||||||
|
|
||||||
output_dir = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\废标"
|
output_dir = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6"
|
||||||
# invalid_added = insert_mark(pdf_path)
|
# invalid_added = insert_mark(pdf_path)
|
||||||
# invalid_added_docx = pdf2docx(invalid_added)
|
# invalid_added_docx = pdf2docx(invalid_added)
|
||||||
invalid_added_docx=r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\invalid_added.docx'
|
invalid_added_docx=r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\invalid_added.docx'
|
||||||
results = combine_find_invalid(invalid_added_docx, output_dir)
|
results = combine_find_invalid(invalid_added_docx, output_dir)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
print("Results:", json.dumps(results, ensure_ascii=False, indent=4))
|
print("Results:", json.dumps(results, ensure_ascii=False, indent=4))
|
||||||
|
@ -269,6 +269,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
|
|||||||
# 货物标和工程标的资格审查整合
|
# 货物标和工程标的资格审查整合
|
||||||
##TODO:陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目(1)_tobidders_notice_part2.pdf 唐山市公安交通警察支队机动车查验机构视频存储回放系统竞争性谈判-招标文件正文(1)_tobidders_notice_part1.pdf 不好搞
|
##TODO:陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目(1)_tobidders_notice_part2.pdf 唐山市公安交通警察支队机动车查验机构视频存储回放系统竞争性谈判-招标文件正文(1)_tobidders_notice_part1.pdf 不好搞
|
||||||
# 无法判断用户上传的是否为乱码文件,可以考虑并行调用大模型,如果为乱码文件直接return None
|
# 无法判断用户上传的是否为乱码文件,可以考虑并行调用大模型,如果为乱码文件直接return None
|
||||||
|
# 目前偏离表.py这块提取带星要求是通过大模型,若采购需求非常长且带星要求非常多,可能会超最大输出字数限制。
|
||||||
|
|
||||||
#截取json文件有些问题:C:\Users\Administrator\Desktop\新建文件夹 (3)\test keywords和special...
|
#截取json文件有些问题:C:\Users\Administrator\Desktop\新建文件夹 (3)\test keywords和special...
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -319,11 +319,11 @@ if __name__ == "__main__":
|
|||||||
logger = get_global_logger("123")
|
logger = get_global_logger("123")
|
||||||
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
|
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
|
||||||
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
|
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
|
||||||
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1).pdf"
|
pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\招标文件(107国道).pdf"
|
||||||
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
|
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
|
||||||
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
|
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
|
||||||
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test"
|
output_folder = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6"
|
||||||
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
|
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
|
||||||
selection = 4 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
|
selection = 6 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
|
||||||
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
|
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
|
||||||
print(generated_files)
|
print(generated_files)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user