1.22 限制了招标公示网址的长度
This commit is contained in:
parent
a000219b10
commit
702b8a39ef
@ -303,17 +303,17 @@ if __name__ == '__main__':
|
|||||||
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
|
# local_path_in="C:\\Users\\Administrator\\Desktop\\fsdownload\\ztbfile.pdf"
|
||||||
# local_path_in ="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\招标文件.pdf"
|
# local_path_in ="C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\招标文件.pdf"
|
||||||
# local_path_in=r"C:\Users\Administrator\Desktop\fsdownload\457ee03d-c61c-4672-b959-2bbb35a1de29\ztbfile_invalid.pdf"
|
# local_path_in=r"C:\Users\Administrator\Desktop\fsdownload\457ee03d-c61c-4672-b959-2bbb35a1de29\ztbfile_invalid.pdf"
|
||||||
local_path_in = r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\3496bb36-c476-42f0-947e-3e39c295f8bc\ztbfile.docx"
|
local_path_in = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\有问题的.doc"
|
||||||
# downloaded_file=pdf2docx(local_path_in)
|
# downloaded_file=pdf2docx(local_path_in)
|
||||||
# # downloaded_file=pdf2docx(local_path_in)
|
# # downloaded_file=pdf2docx(local_path_in)
|
||||||
# # downloaded_file=docx2pdf(local_path_in)
|
downloaded_file=docx2pdf(local_path_in)
|
||||||
# print(downloaded_file)
|
print(downloaded_file)
|
||||||
|
|
||||||
test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7.pdf?Expires=1736852995&OSSAccessKeyId=TMP.3Kg1oKKcsSWb7DXNe4F56bfGfKY5nNWUi274p39HyY7GR3mghMCaFWy69Fi83SBab6PmSkErh4JUD4yAxAGzVVx2hxxoxm&Signature=rmxS5lett4MzWdksDI57EujCklw%3"
|
# test_url = "https://bid-assistance.oss-cn-wuhan-lr.aliyuncs.com/test/%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7%E6%B5%8B%E8%AF%95%E4%BF%A1%E5%8F%B7.pdf?Expires=1736852995&OSSAccessKeyId=TMP.3Kg1oKKcsSWb7DXNe4F56bfGfKY5nNWUi274p39HyY7GR3mghMCaFWy69Fi83SBab6PmSkErh4JUD4yAxAGzVVx2hxxoxm&Signature=rmxS5lett4MzWdksDI57EujCklw%3"
|
||||||
local_file_name = r'D:\flask_project\flask_app\static\output\output1\1d763771-f25a-4b65-839e-3b2ca56577b1\tmp\ztbfile.pdf'
|
# local_file_name = r'D:\flask_project\flask_app\static\output\output1\1d763771-f25a-4b65-839e-3b2ca56577b1\tmp\ztbfile.pdf'
|
||||||
downloaded = download_file(test_url, local_file_name)
|
# downloaded = download_file(test_url, local_file_name)
|
||||||
if not downloaded:
|
# if not downloaded:
|
||||||
print("下载文件失败或不支持的文件类型")
|
# print("下载文件失败或不支持的文件类型")
|
||||||
# downloaded_filepath, file_type = downloaded
|
# downloaded_filepath, file_type = downloaded
|
||||||
# print(downloaded_filepath)
|
# print(downloaded_filepath)
|
||||||
# print(file_type)
|
# print(file_type)
|
||||||
|
@ -75,7 +75,7 @@ def parse_text_by_heading(text):
|
|||||||
|
|
||||||
initial_heading_pattern = None
|
initial_heading_pattern = None
|
||||||
special_section_keywords = ['文件的组成', '文件的构成', '文件包括:', '文件包括:', '雷同认定',
|
special_section_keywords = ['文件的组成', '文件的构成', '文件包括:', '文件包括:', '雷同认定',
|
||||||
'包括以下内容'] # 定义特殊章节关键词
|
'包括以下','包括下列','情形之一','情况之一'] # 定义特殊章节关键词
|
||||||
in_special_section = False # 标志是否在特殊章节中
|
in_special_section = False # 标志是否在特殊章节中
|
||||||
lines = text.split('\n')
|
lines = text.split('\n')
|
||||||
|
|
||||||
@ -150,7 +150,7 @@ def parse_text_by_heading(text):
|
|||||||
dot_text_match = re.match(r'^[..、]\s*(\D.+)$', line_stripped)
|
dot_text_match = re.match(r'^[..、]\s*(\D.+)$', line_stripped)
|
||||||
|
|
||||||
# 匹配不带点号的纯数字开头的情况,例如 '27xxxxx'
|
# 匹配不带点号的纯数字开头的情况,例如 '27xxxxx'
|
||||||
pure_number_match = re.match(r'^(\d+)([^.\d)()号条款].*)', line_stripped) # 不允许出现右括号
|
pure_number_match = re.match(r'^(\d+)([^.\d)()号条款节章项例页段部步点年月日时分秒个元千万台份家].*)', line_stripped) # [^...]:表示匹配不在 [...] 中的字符集合(即排除这些字符)
|
||||||
|
|
||||||
if match:
|
if match:
|
||||||
new_key, line_content = match.groups()
|
new_key, line_content = match.groups()
|
||||||
@ -481,11 +481,11 @@ def convert_clause_to_json(file_path, output_folder, type=1):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file_path = r'D:\flask_project\flask_app\static\output\output1\ce679bd7-a17a-4a95-bfc9-3801bd4a86e4\tmp\ztbfile_tobidders_notice_part2.pdf'
|
file_path = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1)_tobidders_notice_part2.pdf'
|
||||||
# file_path=r'C:\Users\Administrator\Desktop\招标文件-采购类\all\2024-陕西-陕西省某单位2024年执勤化妆服采购项目_tobidders_notice_part2.pdf'
|
# file_path=r'C:\Users\Administrator\Desktop\招标文件-采购类\all\2024-陕西-陕西省某单位2024年执勤化妆服采购项目_tobidders_notice_part2.pdf'
|
||||||
# file_path=r'C:\Users\Administrator\Desktop\货物标\output4\磋商文件_tobidders_notice_part2.pdf'
|
# file_path=r'C:\Users\Administrator\Desktop\货物标\output4\磋商文件_tobidders_notice_part2.pdf'
|
||||||
# file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\6.2定版视频会议磋商文件_tobidders_notice_part2.pdf'
|
# file_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output4\\6.2定版视频会议磋商文件_tobidders_notice_part2.pdf'
|
||||||
output_folder = r'D:\flask_project\flask_app\static\output\output1\ce679bd7-a17a-4a95-bfc9-3801bd4a86e4\tmp'
|
output_folder = r'C:\Users\Administrator\Desktop\新建文件夹 (3)\test'
|
||||||
try:
|
try:
|
||||||
output_path = convert_clause_to_json(file_path, output_folder)
|
output_path = convert_clause_to_json(file_path, output_folder)
|
||||||
print(f"Final JSON result saved to: {output_path}")
|
print(f"Final JSON result saved to: {output_path}")
|
||||||
|
@ -319,11 +319,11 @@ if __name__ == "__main__":
|
|||||||
logger = get_global_logger("123")
|
logger = get_global_logger("123")
|
||||||
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
|
# input_path = r"C:\Users\Administrator\Desktop\new招标文件\货物标"
|
||||||
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
|
# pdf_path = r"C:\Users\Administrator\Desktop\招标文件-采购类\2024-贵州-贵州医科大学附属医院导视系统零星制作安装项目.pdf"
|
||||||
pdf_path=r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\ztbfile.pdf"
|
pdf_path=r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test\泉州白濑水利枢纽工程高低压配电装置设计、制造及采购 (1).pdf"
|
||||||
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
|
# input_path = r"C:\Users\Administrator\Desktop\货物标\zbfiles\2-招标文件(广水市教育局封闭管理).pdf"
|
||||||
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
|
# pdf_path=r"C:\Users\Administrator\Desktop\文件解析问题\文件解析问题\1414cb9c-7bf4-401c-8761-2acde151b9c2\ztbfile.pdf"
|
||||||
output_folder = r"C:\Users\Administrator\Desktop\fsdownload\0c80edcc-cc86-4d53-8bd4-78a531446760\tmp"
|
output_folder = r"C:\Users\Administrator\Desktop\新建文件夹 (3)\test"
|
||||||
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
|
# output_folder = r"C:\Users\Administrator\Desktop\new招标文件\output2"
|
||||||
selection = 1 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
|
selection = 4 # 例如:1 - 公告, 2 - 评标办法, 3 - 资格审查后缀有qualification1或qualification2(与评标办法一致) 4.投标人须知前附表part1 投标人须知正文part2 5-采购需求 6-invalid_path
|
||||||
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
|
generated_files = truncate_pdf_main_goods(pdf_path, output_folder, selection,logger)
|
||||||
print(generated_files)
|
print(generated_files)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user