diff --git a/flask_app/main/工程标解析main.py b/flask_app/main/工程标解析main.py index ef00562..b83ef74 100644 --- a/flask_app/main/工程标解析main.py +++ b/flask_app/main/工程标解析main.py @@ -49,6 +49,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo return None # 调用截取PDF多次 truncate_files = truncate_pdf_multiple(pdf_path, output_folder,unique_id) + print("切割出的文件:"+str(truncate_files)) # 处理各个部分 tobidders_notice_table=truncate_files[0] @@ -60,6 +61,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo evaluation_method = truncate_files[2] #评标方法 qualification = truncate_files[3] #资格审查 + notice_path=truncate_files[4] #公告 invalid_path=truncate_files[5] # invalid_docpath = copy_docx(docx_path) # docx截取无效标部分 @@ -79,6 +81,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo 'file_path': downloaded_file_path, 'output_folder': output_folder, 'invalid_path':invalid_path, + 'notice_path':notice_path, 'tobidders_notice_table': tobidders_notice_table, 'tobidders_notice': tobidders_notice, 'evaluation_method':evaluation_method, @@ -106,14 +109,16 @@ def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo # 形式、响应、资格评审 -def fetch_qualification_review(evaluation_method, qualification, output_folder, tobidders_notice_table, clause_path, invalid_path, merged_baseinfo_path,logger): +def fetch_qualification_review(evaluation_method, qualification, output_folder, tobidders_notice_table, clause_path, invalid_path, merged_baseinfo_path,notice_path,logger): logger.info("starting 资格审查...") start_time = time.time() + if not notice_path: + notice_path=invalid_path if not evaluation_method: evaluation_method = invalid_path if not merged_baseinfo_path: merged_baseinfo_path = invalid_path - review_standards_res = combine_review_standards(evaluation_method, qualification, output_folder, tobidders_notice_table, clause_path, invalid_path, merged_baseinfo_path) + review_standards_res = combine_review_standards(evaluation_method, qualification, output_folder, tobidders_notice_table, clause_path, invalid_path, merged_baseinfo_path,notice_path) end_time = time.time() logger.info(f"资格审查 done,耗时:{end_time - start_time:.2f} 秒") return review_standards_res @@ -188,7 +193,7 @@ def engineering_bid_main(output_folder, downloaded_file_path, file_type, unique_ processed_data['qualification'], output_folder, processed_data['tobidders_notice_table'], processed_data['clause_path'], processed_data['invalid_path'], - processed_data['merged_baseinfo_path'],logger), + processed_data['merged_baseinfo_path'],processed_data['notice_path'],logger), 'evaluation_standards': executor.submit(fetch_evaluation_standards, processed_data['invalid_path'],processed_data['evaluation_method'],logger), 'invalid_requirements': executor.submit(fetch_invalid_requirements, processed_data['invalid_docpath'],output_folder,logger), 'bidding_documents_requirements': executor.submit(fetch_bidding_documents_requirements,processed_data['invalid_path'], processed_data['merged_baseinfo_path_more'],processed_data['clause_path'],logger), diff --git a/flask_app/main/截取pdf.py b/flask_app/main/截取pdf.py index 62b0cef..d8c55d1 100644 --- a/flask_app/main/截取pdf.py +++ b/flask_app/main/截取pdf.py @@ -591,11 +591,11 @@ if __name__ == "__main__": # input_path = "C:\\Users\\Administrator\\Desktop\\new招标文件\\工程标" # input_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\0b1861e6-c7f6-4541-9182-b1384ba84f3b\\ztbfile.pdf" # input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\2-招标文件.pdf" - input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest4_evaluation_method.pdf" + input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest4.pdf" output_folder = "C:\\Users\\Administrator\\Desktop\\new招标文件\\output3" - # files=truncate_pdf_multiple(input_path,output_folder) - selections = [4, 1] # 仅处理 selection 4、1 - files=truncate_pdf_specific_engineering(input_path,output_folder,selections) + files=truncate_pdf_multiple(input_path,output_folder) + # selections = [4, 1] # 仅处理 selection 4、1 + # files=truncate_pdf_specific_engineering(input_path,output_folder,selections) print(files) # selection = 1 # 例如:1 - 投标人须知前附表+正文, 2 - 评标办法, 3 -资格审查条件 4-招标公告 5-无效标 # generated_files = truncate_pdf_main(input_path, output_folder, selection) diff --git a/flask_app/main/资格审查模块.py b/flask_app/main/资格审查模块.py index 0136a14..33ec222 100644 --- a/flask_app/main/资格审查模块.py +++ b/flask_app/main/资格审查模块.py @@ -39,7 +39,7 @@ def process_notice(notice_path): #TODO:目前有个问题,如果evaluation_method既没有符合性审查又没有形式评审,就有问题 def combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path, - invalid_path, merged_baseinfo_path): + invalid_path, merged_baseinfo_path,notice_path): """ 结合评审标准,包括形式评审、响应评审、资格评审及申请人资格要求。 @@ -60,7 +60,7 @@ def combine_review_standards(evaluation_method, qualification_path, output_folde file_id = upload_file(evaluation_method) # 评标办法前附表 first_query=""" - 该文档中是否说明了符合性审查标准?说明了就回答'是',否则回答'否',请以json格式给我返回结果,键名分别是'符合性审查',键值仅限于'是','否'。注意:它与形式、响应性评审是对立的,也就是说只要文档中描述了形式、响应性评审,那么符合性审查的键值一定是'否'。以下为输出示例: + 该文档的评标办法章节中是否说明了符合性审查标准?说明了就回答'是',否则回答'否',请以json格式给我返回结果,键名分别是'符合性审查',键值仅限于'是','否'。注意:它与形式、响应性评审是对立的,也就是说只要文档中描述了形式、响应性评审,那么符合性审查的键值一定是'否'。以下为输出示例: { "符合性审查":"是" } @@ -106,7 +106,7 @@ def combine_review_standards(evaluation_method, qualification_path, output_folde ), "申请人资格要求": executor.submit( process_notice, - merged_baseinfo_path + notice_path ) } # 定义所需的顺序 @@ -135,15 +135,16 @@ def combine_review_standards(evaluation_method, qualification_path, output_folde #TODO: bb61d137-794c-4760-8da7-ebc10cdc2782 还有点问题 if __name__ == "__main__": start_time = time.time() - evaluation_method = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_evaluation_method.pdf" - qualification_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_qualification.pdf" - output_folder = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782" + evaluation_method = r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_evaluation_method.pdf" + qualification_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_qualification.pdf" + output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3" + notice_path='' # knowledge_name="zbtest20" - clause_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\clause1.json" - tobidders_notice_table = r"C:\Users\Administrator\Desktop\new招标文件\工程标\tmp\\HBDL-2024-0017-001-招标文件_tobidders_notice_table.pdf" + clause_path = r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\clause1.json" + tobidders_notice_table = r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_tobidders_notice_table.pdf" - invalid_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_invalid.pdf" - merged_baseinfo_path = r"C:\Users\Administrator\Desktop\fsdownload\bb61d137-794c-4760-8da7-ebc10cdc2782\ztbfile_merged_baseinfo.pdf" + invalid_path = r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_invalid.pdf" + merged_baseinfo_path = r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_merged_baseinfo.pdf" res = combine_review_standards(evaluation_method, qualification_path, output_folder, tobidders_notice_table, clause_path, invalid_path, merged_baseinfo_path) print(json.dumps(res, ensure_ascii=False, indent=4)) diff --git a/flask_app/main/资格评审.py b/flask_app/main/资格评审.py index ed5503f..e894cee 100644 --- a/flask_app/main/资格评审.py +++ b/flask_app/main/资格评审.py @@ -58,30 +58,37 @@ def generate_qual_question(matching_keys_list): # 这里假设资质、信誉 keys_string = "、".join(formatted_keys) # 构造完整的问题语句 question1 = ( - f"""该招标文件中资格评审的内容是怎样的?具体内容包括{keys_string},请你以json格式返回结果,最外层键名为'资格评审',次外层键名为这些评审因素(如资质条件、信誉要求等),可能存在嵌套关系,但最内层键值为一个描述该评审因素的要求及备注的字典,其内层键名分别是'要求'和'备注',若无具体备注信息,可删去'备注'键值对。你的回答内容需要与原文一致,不可擅自总结删减。注意不要回答有关符合性审查的内容。以下为示例输出,仅供格式参考: + f"""该招标文件中资格评审的内容是怎样的?具体内容包括{keys_string},请你以json格式返回结果,最外层键名为'资格评审',次外层键名为这些评审因素(如资质条件、信誉要求等),可能存在嵌套关系,但最内层键值为一个描述该评审因素的要求及备注的字典,其内层键名分别是'要求'和'备注',若无具体备注信息,可删去'备注'键值对。你的回答内容需要与原文一致,不可擅自总结删减。以下为你需要考虑的特殊情况:1.若评审因素是项目人员(如项目经理、技术负责人等),除了'要求','备注',还应增加一个键名'数量',对应的键值为该岗位所需人数,若无相关要求,键值为'未知' 2.若评审因素为信誉要求,那么它为'要求'的对应键值为一个字符串列表,其中每个字符串是一条信誉要求。以下为示例输出,仅供格式参考: {{ - "资格评审": {{ - "资质条件": {{ - "要求": "具备在中华人民共和国境内注册,具有有效营业执照", - "备注": "原件扫描上传" - }}, - "信誉要求": {{ - "要求": "1.没有被依法暂停或取消投标资格;\n2.没有被责令停产停业、暂扣或者吊销许可证、暂扣或者吊销执照;", - }}, - "其他要求": {{ - "项目管理机构人员": {{ - "项目技术负责人": {{ - "要求": "具备市政工程相关专业中级职称或具备市政公用工程专业贰级注册建造师执业资格(不含临时证)", - "备注": "1人" - }}, - "施工管理": {{ - "要求": "持有施工员岗位培训考核合格证书。", - "备注": "1人" - }} + "资格评审": {{ + "资质条件": {{ + "要求": "具备在中华人民共和国境内注册,具有有效营业执照", + "备注": "原件扫描上传" + }}, + "信誉要求": {{ + "要求": [ + "1.没有被依法暂停或取消投标资格;", + "2.没有被责令停产停业、暂扣或者吊销许可证、暂扣或者吊销执照;" + ] + }}, + "项目经理资格": {{ + "要求": "项目经理具有建筑工程专业二级及以上注册建造师执业资格并持有效的安全生产考核合格证(B证)", + "数量": "1人" + }}, + "其他要求":{{ + "项目管理机构人员": {{ + "项目技术负责人": {{ + "要求": "具备市政工程相关专业中级职称或具备市政公用工程专业贰级注册建造师执业资格(不含临时证)", + "数量": "1人" + }}, + "施工管理": {{ + "要求": "持有施工员岗位培训考核合格证书。", + "数量": "未知" }} }} }} }} +}} """ ) # question2 = "该招标文件中资格评审中有关人员资格的要求是怎样的?请依次给出所需的岗位、需要的数量、资格要求、需要提交的证明材料(如具体的社保证明、技能证书等,若有时间要求请注明时间范围)、在岗要求、备注,若相关要求不存在,则无需返回该键值对。请你以json格式返回结果,外层键名为'资格评审',嵌套键名为具体的要求,请你忠于原文,回答要求完整准确,不要擅自总结、删减。" diff --git a/flask_app/货物标/提取json货物标版.py b/flask_app/货物标/提取json货物标版.py index 49d106b..9d003e3 100644 --- a/flask_app/货物标/提取json货物标版.py +++ b/flask_app/货物标/提取json货物标版.py @@ -174,8 +174,8 @@ def process_folder(input_folder, output_folder): except ValueError as e: print(f"Error processing {file_name}: {e}") -#TODO: 投标人须知正文这块,序号可能是乱序的,或许可以删除判断序号大小的逻辑,只要出现在开头的序号就作为新的键 eg:2-招标文件。目前将这种情况当特殊处理 -#TODO:招标文件111_tobidders_notice_part2.pdf 陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目 (1)_tobidders_notice_part2.pdf +#TODO: 投标人须知正文这块,序号可能是乱序的,目前保留了默认新序号大于旧序号,否则当作上个序号的正文。 但2-招标文件序号是混乱的,无解 +#TODO:招标文件111_tobidders_notice_part2.pdf 陕西省公安厅交通警察总队高速公路交通安全智能感知巡查系统项目(1)_tobidders_notice_part2.pdf if __name__ == "__main__": # file_path = 'D:\\flask_project\\flask_app\\static\\output\\cfd4959d-5ea9-4112-8b50-9e543803f029\\ztbfile_tobidders_notice.pdf' file_path=r'C:\Users\Administrator\Desktop\货物标\output4\广水农商行门禁控制主机及基础验证设备采购项目——磋商文件(定稿)(三次)_tobidders_notice_part2.pdf' diff --git a/flask_app/货物标/资格审查main.py b/flask_app/货物标/资格审查main.py index 87ddff0..7c2ea45 100644 --- a/flask_app/货物标/资格审查main.py +++ b/flask_app/货物标/资格审查main.py @@ -586,17 +586,17 @@ if __name__ == "__main__": start_time=time.time() # qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\6.2定版视频会议磋商文件_qualification2.pdf" # output_folder = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89" - output_folder="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\tmp" + output_folder=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3" # qualification_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_qualification1.pdf" # qualification_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_qualification2.pdf" - qualification_path="C:\\Users\\Administrator\\Desktop\\货物标\\output3\\2-招标文件(统计局智能终端二次招标)_qualification1.pdf" + qualification_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_qualification.pdf" # notice_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\6558a50a-13ea-4279-a5db-684935481c39\\ztbfile_notice.pdf" # notice_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile_notice.pdf" - notice_path="C:\\Users\\Administrator\\Desktop\\货物标\\output5\\2-招标文件(统计局智能终端二次招标)_notice.pdf" + notice_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_notice.pdf" # knowledge_name = "6.2视频会议docx" # invalid_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\e7dda5cb-10ba-47a8-b989-d2993d34bb89\\ztbfile.pdf" # invalid_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\52e54b20-c975-4cf3-a06b-6f146aaa93f5\\ztbfile.pdf" - invalid_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\2-招标文件(统计局智能终端二次招标).pdf" + invalid_path=r"D:\flask_project\flask_app\static\output\output1\c911b0f8-0ff4-4718-80e3-86f464f313d3\ztbfile_invalid.pdf" res = combine_qualification_review(invalid_path, qualification_path, notice_path) print(json.dumps(res, ensure_ascii=False, indent=4)) end_time=time.time()