This commit is contained in:
zy123 2024-10-24 10:56:33 +08:00
parent 7776c41ed0
commit 8f428e73da
3 changed files with 18 additions and 25 deletions

View File

@ -42,6 +42,7 @@ def get_filename_and_folder(file_path):
def pdf2docx(local_path_in):
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d'
receive_download_url = upload_file(local_path_in, remote_url) #转换完成,得到下载链接
print(receive_download_url)
filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹
local_filename=os.path.join(folder,filename) #输出文件名 C:\Users\Administrator\Desktop\货物标\zbfiles\6.2定版视频会议磋商文件 不带后缀
downloaded_filepath,file_type=download_file(receive_download_url, local_filename)
@ -162,7 +163,8 @@ def doc2docx(file_path):
if __name__ == '__main__':
# 替换为你的文件路径和API URL
local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\1fbbb6ff-7ddc-40bb-8857-b7de37aece3f\\兴欣工程.pdf"
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\1fbbb6ff-7ddc-40bb-8857-b7de37aece3f\\兴欣工程.pdf"
local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
# downloaded_file=doc2docx(local_path_in)
downloaded_file=pdf2docx(local_path_in)
print(downloaded_file)

View File

@ -381,6 +381,7 @@ def process_and_stream(file_url, zb_type):
includes = ["基础信息", "资格审查", "商务评分", "技术评分", "无效标与废标项", "投标文件要求", "开评定标流程"]
final_result, extracted_info,procurement_reqs = outer_post_processing(combined_data, includes)
logger.info(f"Procurement requirements extracted: {json.dumps(procurement_reqs, ensure_ascii=False, indent=4)}") # 添加日志记录
procurement_reqs_response={
'message': 'procurement_reqs',
'filename': os.path.basename(downloaded_filepath),

View File

@ -1,24 +1,14 @@
import re
begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*(招标公告|投标须知.*)|(^|\n)第一卷|招标编号:|招标编号:')
# 测试用例
test_cases = [
"第一章 招标公告",
"\n第一章 招标公告", # 在第二行
"第二章 投标须知",
"第三章 投标须知要求",
"第一卷 投标文件格式",
"招标编号: ABC123",
"招标编号DEF456",
"第三章 项目概述",
"第四章 评标办法"
]
# 测试匹配
for i, case in enumerate(test_cases, 1):
match = begin_pattern.search(case)
if match:
print(f"Test case {i}: Matched - '{case}'")
else:
print(f"Test case {i}: Not matched - '{case}'")
from flask_app.general.通义千问long import upload_file,qianwen_long
file_path="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\zbtest19\\zbtest19_214-320.pdf"
user_query="""该文件为投标文件格式要求,请你根据该招标文件回答:营业执照应该附在哪个地方?你可能需要查找以下章节出现的地方:'具有独立承担民事责任能力的法人','投标人基本信息表','法人或者其他组织的营业执照等证明文件,自然人的身份证明','投标人情况介绍','投标人简介','企业相关证件'
我需要将营业执照贴在该章节的最后面目前我需要定位到插入的位置请你返回给我插入位置的上下文字数限制在30字以内以json格式返回键名分别是'上文','下文'上下文应格式内容完全与原文保持一致不得擅自删减总结示例输出如下
{
"上文":"投标人: (盖单位章)
",
"下文":"四、投标保证金
招标人名称"
}
"""
file_id=upload_file(file_path)
res=qianwen_long(file_id,user_query)
print(res)