1.23 修了开评定标提示词 废标无效标 增加了'十、废标条件' 这种的提取

This commit is contained in:
zy123 2025-01-23 14:21:39 +08:00
parent ab74e4d03e
commit d8d5ece4fc
3 changed files with 8 additions and 7 deletions

View File

@ -351,7 +351,7 @@ def extract_text_with_keywords(processed_paragraphs, keywords, follow_up_keyword
current_section_pattern = re.compile(combined_pattern)
else:
process_matching_section(None) # Reusing the common function for matching sections
process_matching_section() # Reusing the common function for matching sections
return current_index
@ -662,12 +662,12 @@ if __name__ == '__main__':
# clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json"
# doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx"
# doc_path = r'C:\Users\Administrator\Desktop\new招标文件\tmp\2024-贵州-贵州省罗甸县 2024 年度广州市协作资金龙坪镇、边阳镇产业路硬化建设项目.docx'
pdf_path = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\2025-湖北-通羊镇港口村人饮项目谈判文件.pdf'
pdf_path = r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\招标文件(107国道)_invalid.pdf'
output_dir = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\废标"
output_dir = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6"
# invalid_added = insert_mark(pdf_path)
# invalid_added_docx = pdf2docx(invalid_added)
invalid_added_docx=r'C:\Users\Administrator\Desktop\新建文件夹 (3)\废标\invalid_added.docx'
invalid_added_docx=r'C:\Users\Administrator\Desktop\货物标\zbfiles\output6\invalid_added.docx'
results = combine_find_invalid(invalid_added_docx, output_dir)
end_time = time.time()
print("Results:", json.dumps(results, ensure_ascii=False, indent=4))

View File

@ -269,6 +269,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
# 货物标和工程标的资格审查整合
##TODO:陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目(1)_tobidders_notice_part2.pdf 唐山市公安交通警察支队机动车查验机构视频存储回放系统竞争性谈判-招标文件正文(1)_tobidders_notice_part1.pdf 不好搞
# 无法判断用户上传的是否为乱码文件,可以考虑并行调用大模型,如果为乱码文件直接return None
# 目前偏离表.py这块提取带星要求是通过大模型若采购需求非常长且带星要求非常多可能会超最大输出字数限制。
#截取json文件有些问题C:\Users\Administrator\Desktop\新建文件夹 (3)\test keywords和special...
if __name__ == "__main__":

View File

@ -319,11 +319,11 @@ if __name__ == "__main__":
logger = get_global_logger("123")
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1).pdf"
pdf_path=r"C:\Users\Administrator\Desktop\货物标\zbfiles\招标文件(107国道).pdf"
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test"
output_folder = r"C:\Users\Administrator\Desktop\货物标\zbfiles\output6"
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
selection = 4 # 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
selection = 6 # 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
print(generated_files)