1.22 限制了招标公示网址的长度

This commit is contained in:
zy123 2025-01-22 16:57:40 +08:00
parent a000219b10
commit 702b8a39ef
3 changed files with 15 additions and 15 deletions

View File

@ -303,17 +303,17 @@ if __name__ == '__main__':
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf" # local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
# local_path_in ="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\招标文件.pdf" # local_path_in ="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\招标文件.pdf"
# local_path_in=r"C:\Users\Administrator\Desktop\fsdownload\457ee03d-c61c-4672-b959-2bbb35a1de29\ztbfile_invalid.pdf" # local_path_in=r"C:\Users\Administrator\Desktop\fsdownload\457ee03d-c61c-4672-b959-2bbb35a1de29\ztbfile_invalid.pdf"
local_path_in = r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\3496bb36-c476-42f0-947e-3e39c295f8bc\ztbfile.docx" local_path_in = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\有问题的.doc"
# downloaded_file=pdf2docx(local_path_in) # downloaded_file=pdf2docx(local_path_in)
# # downloaded_file=pdf2docx(local_path_in) # # downloaded_file=pdf2docx(local_path_in)
# # downloaded_file=docx2pdf(local_path_in) downloaded_file=docx2pdf(local_path_in)
# print(downloaded_file) print(downloaded_file)
test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7.pdf?Expires=1736852995&OSSAccessKeyId=TMP.3Kg1oKKcsSWb7DXNe4F56bfGfKY5nNWUi274p39HyY7GR3mghMCaFWy69Fi83SBab6PmSkErh4JUD4yAxAGzVVx2hxxoxm&Signature=rmxS5lett4MzWdksDI57EujCklw%3" # test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7.pdf?Expires=1736852995&OSSAccessKeyId=TMP.3Kg1oKKcsSWb7DXNe4F56bfGfKY5nNWUi274p39HyY7GR3mghMCaFWy69Fi83SBab6PmSkErh4JUD4yAxAGzVVx2hxxoxm&Signature=rmxS5lett4MzWdksDI57EujCklw%3"
local_file_name = r'D:\flask_project\flask_app\static\output\output1\1d763771-f25a-4b65-839e-3b2ca56577b1\tmp\ztbfile.pdf' # local_file_name = r'D:\flask_project\flask_app\static\output\output1\1d763771-f25a-4b65-839e-3b2ca56577b1\tmp\ztbfile.pdf'
downloaded = download_file(test_url, local_file_name) # downloaded = download_file(test_url, local_file_name)
if not downloaded: # if not downloaded:
print("下载文件失败或不支持的文件类型") # print("下载文件失败或不支持的文件类型")
# downloaded_filepath, file_type = downloaded # downloaded_filepath, file_type = downloaded
# print(downloaded_filepath) # print(downloaded_filepath)
# print(file_type) # print(file_type)

View File

@ -75,7 +75,7 @@ def parse_text_by_heading(text):
initial_heading_pattern = None initial_heading_pattern = None
special_section_keywords = ['文件的组成', '文件的构成', '文件包括:', '文件包括:', '雷同认定', special_section_keywords = ['文件的组成', '文件的构成', '文件包括:', '文件包括:', '雷同认定',
'包括以下内容'] # 定义特殊章节关键词 '包括以下','包括下列','情形之一','情况之一'] # 定义特殊章节关键词
in_special_section = False # 标志是否在特殊章节中 in_special_section = False # 标志是否在特殊章节中
lines = text.split('\n') lines = text.split('\n')
@ -150,7 +150,7 @@ def parse_text_by_heading(text):
dot_text_match = re.match(r'^[.、]\s*(\D.+)$', line_stripped) dot_text_match = re.match(r'^[.、]\s*(\D.+)$', line_stripped)
# 匹配不带点号的纯数字开头的情况,例如 '27xxxxx' # 匹配不带点号的纯数字开头的情况,例如 '27xxxxx'
pure_number_match = re.match(r'^(\d+)([^.\d)()号条款].*)', line_stripped) # 不允许出现右括号 pure_number_match = re.match(r'^(\d+)([^.\d)()号条款节章项例页段部步点年月日时分秒个元千万台份家].*)', line_stripped) # [^...]:表示匹配不在 [...] 中的字符集合(即排除这些字符)
if match: if match:
new_key, line_content = match.groups() new_key, line_content = match.groups()
@ -481,11 +481,11 @@ def convert_clause_to_json(file_path, output_folder, type=1):
if __name__ == "__main__": if __name__ == "__main__":
file_path = r'D:\flask_project\flask_app\static\output\output1\ce679bd7-a17a-4a95-bfc9-3801bd4a86e4\tmp\ztbfile_tobidders_notice_part2.pdf' file_path = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1)_tobidders_notice_part2.pdf'
# file_path=r'C:\Users\Administrator\Desktop\招标文件-采购类\all\2024-陕西-陕西省某单位2024年执勤化妆服采购项目_tobidders_notice_part2.pdf' # file_path=r'C:\Users\Administrator\Desktop\招标文件-采购类\all\2024-陕西-陕西省某单位2024年执勤化妆服采购项目_tobidders_notice_part2.pdf'
# file_path=r'C:\Users\Administrator\Desktop\货物标\output4\磋商文件_tobidders_notice_part2.pdf' # file_path=r'C:\Users\Administrator\Desktop\货物标\output4\磋商文件_tobidders_notice_part2.pdf'
# file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\6.2定版视频会议磋商文件_tobidders_notice_part2.pdf' # file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\6.2定版视频会议磋商文件_tobidders_notice_part2.pdf'
output_folder = r'D:\flask_project\flask_app\static\output\output1\ce679bd7-a17a-4a95-bfc9-3801bd4a86e4\tmp' output_folder = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\test'
try: try:
output_path = convert_clause_to_json(file_path, output_folder) output_path = convert_clause_to_json(file_path, output_folder)
print(f"Final JSON result saved to: {output_path}") print(f"Final JSON result saved to: {output_path}")

View File

@ -319,11 +319,11 @@ if __name__ == "__main__":
logger = get_global_logger("123") logger = get_global_logger("123")
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标" # input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf" # pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
pdf_path=r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\ztbfile.pdf" pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1).pdf"
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf" # input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf" # pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\tmp" output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test"
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2" # output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
selection = 1 # 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path selection = 4 # 例如1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2与评标办法一致 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger) generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
print(generated_files) print(generated_files)