diff --git a/flask_app/general/format_change.py b/flask_app/general/format_change.py index 2fe42ce..0cf1072 100644 --- a/flask_app/general/format_change.py +++ b/flask_app/general/format_change.py @@ -52,7 +52,7 @@ def pdf2docx(local_path_in): return "" remote_url = 'http://47.98.59.178:8000/v3/3rd/files/transfer/p2d' receive_download_url = upload_file(local_path_in, remote_url) #转换完成,得到下载链接 - print(receive_download_url) + # print(receive_download_url) filename, folder = get_filename_and_folder(local_path_in) #输入输出在同一个文件夹 local_filename=os.path.join(folder,filename) #输出文件名 C:\Users\Administrator\Desktop\货物标\zbfiles\6.2定版视频会议磋商文件 不带后缀 downloaded_filepath,file_type=download_file(receive_download_url, local_filename) diff --git a/flask_app/main/工程标解析main.py b/flask_app/main/工程标解析main.py index 2ebb08d..1a701b8 100644 --- a/flask_app/main/工程标解析main.py +++ b/flask_app/main/工程标解析main.py @@ -50,7 +50,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo print("切割出的文件:"+str(truncate_files)) # 处理各个部分 - tobidders_notice_table=truncate_files[0] + tobidders_notice_table=truncate_files[0] #投标人须知前附表 # tobidders_notice_table_docx = pdf2docx(tobidders_notice_table) # 投标人须知前附表转docx # truncate_jsonpath = extract_tables_main(tobidders_notice_table_docx, output_folder) # 投标人须知前附表docx->json @@ -67,8 +67,7 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo try: # 尝试加载 .docx 文件 doc = Document(invalid_docpath) - # 如果可以成功读取,则返回 True - return True + print("yes") except Exception as e: # 捕获异常并打印错误信息 invalid_docpath=pdf2docx(pdf_path) @@ -96,9 +95,10 @@ def preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,lo 'clause_path': clause_path, 'invalid_docpath': invalid_docpath } + # 基本信息 -def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo_path_more,tobidders_notice, clause_path,logger): +def fetch_project_basic_info(invalid_path, merged_baseinfo_path, merged_baseinfo_path_more,tobidders_notice, clause_path, logger): logger.info("starting 基础信息...") start_time = time.time() if not merged_baseinfo_path: @@ -186,6 +186,8 @@ def engineering_bid_main(output_folder, downloaded_file_path, file_type, unique_ logger = get_global_logger(unique_id) # 预处理文件,获取处理后的数据 processed_data = preprocess_files(output_folder, downloaded_file_path, file_type,unique_id,logger) + print("zy123") + print(json.dumps(processed_data,ensure_ascii=False,indent=4)) if not processed_data: yield json.dumps({}) # 如果处理数据失败,返回空的 JSON diff --git a/flask_app/main/截取pdf.py b/flask_app/main/截取pdf.py index 9851a31..d8fba26 100644 --- a/flask_app/main/截取pdf.py +++ b/flask_app/main/截取pdf.py @@ -587,12 +587,14 @@ if __name__ == "__main__": # input_path = "C:\\Users\\Administrator\\Desktop\\new招标文件\\工程标" # input_path="C:\\Users\\Administrator\\Desktop\\fsdownload\\0b1861e6-c7f6-4541-9182-b1384ba84f3b\\ztbfile.pdf" # input_path = "C:\\Users\\Administrator\\Desktop\\货物标\\zbfiles\\2-招标文件.pdf" - input_path=r"C:\Users\Administrator\Desktop\招标文件\招标test文件夹\zbtest20.pdf" + input_path=r"C:\Users\Administrator\Desktop\fsdownload\854fb19f-96d3-4b3e-b2ba-1a095344fd92\ztbfile.pdf" output_folder = "C:\\Users\\Administrator\\Desktop\\new招标文件\\output3" files=truncate_pdf_multiple(input_path,output_folder) # selections = [4, 1] # 仅处理 selection 4、1 # files=truncate_pdf_specific_engineering(input_path,output_folder,selections) - print(files) + for i in files: + print(type(i)) + print(i) # selection = 1 # 例如:1 - 投标人须知前附表+正文, 2 - 评标办法, 3 -资格审查条件 4-招标公告 5-无效标 # generated_files = truncate_pdf_main(input_path, output_folder, selection) # print(generated_files) diff --git a/flask_app/货物标/货物标解析main.py b/flask_app/货物标/货物标解析main.py index ac45c5b..a507967 100644 --- a/flask_app/货物标/货物标解析main.py +++ b/flask_app/货物标/货物标解析main.py @@ -244,6 +244,8 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id): #TODO: start up 结构优化 #TODO:C:\Users\Administrator\Desktop\fsdownload\16fd6b4e-3975-4c83-8ba6-1bc9263a6a5b 符合性审查未找到 # 小解析也更新偏离表 + +#TODO 体育器材 符合性检查的外键 采购需求 #商务标这里改为列表最里层 #good_list 金额 截取上下文 if __name__ == "__main__":