11.21 修复了工程标资格审查的bug

This commit is contained in:
zy123 2024-11-21 14:36:38 +08:00
parent 7fc29ea2a0
commit 5f6ce70fb3
4 changed files with 13 additions and 7 deletions

View File

@ -52,7 +52,7 @@ def pdf2docx(local_path_in):
return ""
remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d'
receive_download_url = upload_file(local_path_in, remote_url) #转换完成,得到下载链接
print(receive_download_url)
# print(receive_download_url)
filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹
local_filename=os.path.join(folder,filename) #输出文件名 C:\Users\Administrator\Desktop\货物标\zbfiles\6.2定版视频会议磋商文件 不带后缀
downloaded_filepath,file_type=download_file(receive_download_url, local_filename)

View File

@ -50,7 +50,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo
print("切割出的文件:"+str(truncate_files))
# 处理各个部分
tobidders_notice_table=truncate_files[0]
tobidders_notice_table=truncate_files[0] #投标人须知前附表
# tobidders_notice_table_docx = pdf2docx(tobidders_notice_table) # 投标人须知前附表转docx
# truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder) # 投标人须知前附表docx->json
@ -67,8 +67,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo
try:
# 尝试加载 .docx 文件
doc = Document(invalid_docpath)
# 如果可以成功读取,则返回 True
return True
print("yes")
except Exception as e:
# 捕获异常并打印错误信息
invalid_docpath=pdf2docx(pdf_path)
@ -96,6 +95,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo
'clause_path': clause_path,
'invalid_docpath': invalid_docpath
}
# 基本信息
def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo_path_more,tobidders_notice, clause_path, logger):
@ -186,6 +186,8 @@ def engineering_bid_main(output_folder, downloaded_file_path, file_type, unique_
logger = get_global_logger(unique_id)
# 预处理文件,获取处理后的数据
processed_data = preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,logger)
print("zy123")
print(json.dumps(processed_data,ensure_ascii=False,indent=4))
if not processed_data:
yield json.dumps({}) # 如果处理数据失败,返回空的 JSON

View File

@ -587,12 +587,14 @@ if __name__ == "__main__":
# input_path = "C:\\Users\\Administrator\\Desktop\\new招标文件\\工程标"
# input_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\0b1861e6-c7f6-4541-9182-b1384ba84f3b\\ztbfile.pdf"
# input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\2-招标文件.pdf"
input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest20.pdf"
input_path=r"C:\Users\Administrator\Desktop\fsdownload\854fb19f-96d3-4b3e-b2ba-1a095344fd92\ztbfile.pdf"
output_folder = "C:\\Users\\Administrator\\Desktop\\new招标文件\\output3"
files=truncate_pdf_multiple(input_path,output_folder)
# selections = [4, 1] # 仅处理 selection 4、1
# files=truncate_pdf_specific_engineering(input_path,output_folder,selections)
print(files)
for i in files:
print(type(i))
print(i)
# selection = 1 # 例如1 - 投标人须知前附表+正文, 2 - 评标办法, 3 -资格审查条件 4-招标公告 5-无效标
# generated_files = truncate_pdf_main(input_path, output_folder, selection)
# print(generated_files)

View File

@ -244,6 +244,8 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
#TODO: start up 结构优化
#TODO:C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b 符合性审查未找到
# 小解析也更新偏离表
#TODO 体育器材 符合性检查的外键 采购需求
#商务标这里改为列表最里层
#good_list 金额 截取上下文
if __name__ == "__main__":