9.3
This commit is contained in:
parent
1d0211ce72
commit
ee16d5e4b4
50
flask_app/main/docx截取docx.py
Normal file
50
flask_app/main/docx截取docx.py
Normal file
@ -0,0 +1,50 @@
|
||||
from docx import Document
|
||||
import re
|
||||
import os
|
||||
|
||||
|
||||
def copy_docx(source_path):
|
||||
doc = Document(source_path) # 打开源文档
|
||||
output_folder = os.path.dirname(source_path)
|
||||
|
||||
# 获取原文件名并添加后缀
|
||||
original_file_name = os.path.basename(source_path)
|
||||
file_name_without_ext, file_ext = os.path.splitext(original_file_name)
|
||||
modified_file_name = file_name_without_ext + "_invalid" + file_ext
|
||||
destination_path = os.path.join(output_folder, modified_file_name)
|
||||
|
||||
new_doc = Document() # 创建新文档
|
||||
|
||||
# 定义正则表达式模式
|
||||
begin_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷')
|
||||
end_pattern = re.compile(r'第[一二三四五六七八九十]+章\s*合同|:清标报告|:清标报告')
|
||||
|
||||
# 寻找最后一个begin_pattern的位置
|
||||
last_begin_index = -1
|
||||
for i, paragraph in enumerate(doc.paragraphs):
|
||||
if begin_pattern.search(paragraph.text):
|
||||
last_begin_index = i
|
||||
|
||||
# 从最后一个匹配的begin_pattern开始复制,直到end_pattern
|
||||
if last_begin_index != -1:
|
||||
for i, paragraph in enumerate(doc.paragraphs[last_begin_index:], start=last_begin_index):
|
||||
new_para = new_doc.add_paragraph(style=paragraph.style)
|
||||
for run in paragraph.runs:
|
||||
new_run = new_para.add_run(run.text)
|
||||
new_run.bold = run.bold
|
||||
new_run.italic = run.italic
|
||||
new_run.underline = run.underline
|
||||
if run.font.color:
|
||||
new_run.font.color.rgb = run.font.color.rgb
|
||||
new_run.font.size = run.font.size
|
||||
|
||||
if end_pattern.search(paragraph.text):
|
||||
break
|
||||
|
||||
new_doc.save(destination_path) # 保存新文档
|
||||
|
||||
|
||||
# 调用函数
|
||||
if __name__ == '__main__':
|
||||
source_path = "C:\\Users\\Administrator\\Desktop\\货物标\\output\\zbtest13.docx"
|
||||
copy_docx(source_path)
|
@ -58,7 +58,7 @@ def docx2pdf(local_path_in):
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 替换为你的文件路径和API URL
|
||||
local_path_in="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest16_invalid.docx"
|
||||
local_path_in="C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\test111.pdf"
|
||||
# pdf2docx(local_path_in)
|
||||
downloaded_file=docx2pdf(local_path_in)
|
||||
print(downloaded_file)
|
||||
|
@ -1,18 +1,35 @@
|
||||
import re
|
||||
import ast
|
||||
|
||||
# 正则表达式
|
||||
pattern = re.compile(r'第[一二三四五六七八九十百千]+章.*?(?:项目|服务|商务).*?要求')
|
||||
def process_string_list(string_list):
|
||||
# 使用正则表达式匹配方括号内的内容
|
||||
match = re.search(r'\[(.*?)\]', string_list)
|
||||
if match:
|
||||
# 获取匹配的内容,即方括号内的部分
|
||||
content_inside_brackets = match.group(1)
|
||||
if content_inside_brackets: # 检查内容是否为空
|
||||
# 检查内容是否是数字列表
|
||||
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
|
||||
# 如果是数字,不用加引号,直接保留数字
|
||||
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
# 如果不全是数字,按字符串处理
|
||||
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
return [] # 直接返回空列表如果内容为空
|
||||
|
||||
# 示例文本进行测试
|
||||
text = """
|
||||
第一章项目技术、服务及商务要求
|
||||
第二章 服务细节要求
|
||||
第三章 商务处理要求
|
||||
第四章 项目安排要求
|
||||
第五章 安全要求
|
||||
"""
|
||||
# 使用 ast.literal_eval 来解析格式化后的字符串
|
||||
try:
|
||||
actual_list = ast.literal_eval(formatted_list)
|
||||
return actual_list
|
||||
except SyntaxError as e:
|
||||
print(f"Error parsing list: {e}")
|
||||
return []
|
||||
else:
|
||||
# 如果没有匹配到内容,返回空列表
|
||||
return []
|
||||
|
||||
# 查找所有匹配
|
||||
matches = pattern.findall(text)
|
||||
for match in matches:
|
||||
print(match)
|
||||
# 测试代码
|
||||
test_string = "[1,2,哈哈]"
|
||||
result = process_string_list(test_string)
|
||||
print(result) # 现在应该输出: [1, 2, 4, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22]
|
||||
|
@ -9,8 +9,9 @@ def clean_page_numbers(text):
|
||||
# 删除结尾的页码
|
||||
cleaned_text = re.sub(r'\s+\d+\s*$', '', cleaned_text)
|
||||
# 删除形如 /129 的页码
|
||||
cleaned_text = re.sub(r'\s*\/\s*\d+\s*', '', cleaned_text)
|
||||
cleaned_text = re.sub(r'\s*\/\s*\d+\\s*', '', cleaned_text)
|
||||
return cleaned_text
|
||||
|
||||
def extract_pages(pdf_path, output_folder, chapter_pattern, begin_page, end_phrase_pattern, output_suffix):
|
||||
# 打开PDF文件
|
||||
pdf_document = PdfReader(pdf_path)
|
||||
@ -38,7 +39,6 @@ def extract_pages(pdf_path, output_folder, chapter_pattern, begin_page, end_phra
|
||||
end_page = i
|
||||
break
|
||||
|
||||
|
||||
# 确保找到了起始和结束页面
|
||||
if start_page is None or end_page is None:
|
||||
print(f"未找到起始或结束页在文件 {pdf_path} 中!")
|
||||
@ -111,6 +111,14 @@ def truncate_pdf_main(input_path, output_folder, selection):
|
||||
]
|
||||
output_suffix = "tobidders_notice"
|
||||
elif selection==4:
|
||||
# 配置用于 "资格审查条件" 的正则表达式模式和短语
|
||||
appendix_pattern = r'^附录(?:一)?[::]|^附件(?:一)?[::]|^附表(?:一)?[::]'
|
||||
pattern = re.compile(appendix_pattern)
|
||||
begin_page = 5
|
||||
end_phrases = [r'评标办法正文', r'评标办法', appendix_pattern]
|
||||
output_suffix = "qualification"
|
||||
elif selection == 5:
|
||||
# 配置用于 "无效标" 的正则表达式模式和短语
|
||||
pattern = re.compile(r'第[一二三四五六七八九十]+章\s*招标公告|第一卷|招标编号:|招标编号:')
|
||||
begin_page = 0
|
||||
end_phrases = [
|
||||
@ -118,13 +126,7 @@ def truncate_pdf_main(input_path, output_folder, selection):
|
||||
r':清标报告',# 添加了新的匹配项
|
||||
r':清标报告'
|
||||
]
|
||||
output_suffix="invalid"
|
||||
elif selection==5:
|
||||
appendix_pattern = r'^附录(?:一)?[::]|^附件(?:一)?[::]|^附表(?:一)?[::]'
|
||||
pattern = re.compile(appendix_pattern)
|
||||
begin_page=5
|
||||
end_phrases = [r'评标办法正文', r'评标办法',appendix_pattern]
|
||||
output_suffix="qualification"
|
||||
output_suffix = "invalid"
|
||||
else:
|
||||
print("无效的选择")
|
||||
return None
|
||||
@ -134,16 +136,15 @@ def truncate_pdf_main(input_path, output_folder, selection):
|
||||
|
||||
def truncate_pdf_multiple(input_path, output_folder):
|
||||
truncate_files = []
|
||||
for selection in range(1, 6):
|
||||
for selection in range(1, 5):
|
||||
files = truncate_pdf_main(input_path, output_folder, selection)
|
||||
truncate_files.extend(files)
|
||||
return truncate_files
|
||||
|
||||
if __name__ == "__main__":
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标test文件夹\\zbtest5.pdf"
|
||||
input_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.pdf"
|
||||
output_folder = "C:\\Users\\Administrator\\Desktop\\招标文件\\test"
|
||||
truncate_pdf_multiple(input_path,output_folder)
|
||||
# selection = 5 # 例如:1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-招标公告-合同条款前
|
||||
# generated_files = truncate_pdf_main(input_path, output_folder, selection)
|
||||
# truncate_pdf_multiple(input_path,output_folder)
|
||||
selection = 5 # 例如:1 - 投标人须知前附表, 2 - 评标办法, 3 - 投标人须知正文 4-资格审查条件
|
||||
generated_files = truncate_pdf_main(input_path, output_folder, selection)
|
||||
# print("生成的文件:", generated_files)
|
||||
|
||||
|
@ -33,7 +33,8 @@ def extract_section(text, start_keyword, end_phrases):
|
||||
|
||||
end_index = len(text)
|
||||
for phrase in end_phrases:
|
||||
match = re.search(phrase, text[start_index:])
|
||||
# Use multiline mode with `re.MULTILINE`
|
||||
match = re.search(phrase, text[start_index:], re.MULTILINE) #Hello, world!\nWelcome to OpenAI. 在多行字符串多,要 re.MULTILINE以匹配每一行的开头,否则只会匹配字符串的开头。
|
||||
if match:
|
||||
end_index = start_index + match.start()
|
||||
break
|
||||
@ -118,10 +119,8 @@ def convert_to_json(file_path, start_word, end_phrases):
|
||||
text = extract_text_from_pdf(file_path)
|
||||
else:
|
||||
raise ValueError("Unsupported file format")
|
||||
|
||||
# 提取从 start_word 开始到 end_phrases 结束的内容
|
||||
text = extract_section(text, start_word, end_phrases)
|
||||
|
||||
parsed_data = parse_text_by_heading(text)
|
||||
return parsed_data
|
||||
|
||||
@ -138,13 +137,13 @@ def convert_clause_to_json(input_path,output_folder):
|
||||
return output_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\48e650ff-70eb-48df-874c-66c8abbcd89d\\ztbfile_tobidders_notice.pdf'
|
||||
file_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\713022ff-27d9-43e3-9cc9-2752effbfd66\\ztbfile_tobidders_notice.pdf'
|
||||
start_word = "投标人须知正文"
|
||||
end_phrases = [
|
||||
r'^第[一二三四五六七八九十]+章\s+评标办法', r'^评标办法前附表', r'^附录:', r'^附录一:', r'^附件:', r'^附件一:',
|
||||
r'^附表:', r'^附表一:', r'^附录:', r'^附录一:', r'^附件:', r'^附件一:', r'^附表:', r'^附表一:',
|
||||
]
|
||||
output_folder = 'C:\\Users\\Administrator\\Desktop\\招标文件'
|
||||
output_folder = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp6\\713022ff-27d9-43e3-9cc9-2752effbfd66'
|
||||
try:
|
||||
output_path = convert_clause_to_json(file_path,output_folder)
|
||||
print(f"Final JSON result saved to: {output_path}")
|
||||
|
File diff suppressed because one or more lines are too long
@ -7,6 +7,7 @@ from flask_app.main.json_utils import combine_json_results, nest_json_under_key
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from flask_app.main.禁止投标情形 import find_forbidden
|
||||
from 禁止投标情形 import process_string_list
|
||||
|
||||
#如果当前段落有序号,则向下匹配直接遇到相同的序号样式
|
||||
#如果当前段落无序号,则向下匹配序号,把若干同类的序号都摘出来。
|
||||
@ -85,7 +86,7 @@ def clean_dict_datas(extracted_contents, keywords,excludes): #让正则表达
|
||||
if any(exclude in data for exclude in excludes):
|
||||
continue # 如果包含任何排除字符串,跳过这个数据
|
||||
# 去掉开头的序号,包括字母+数字的格式 以及括号+数字
|
||||
pattern = r'^\s*([((]\d+[))]|[A-Za-z]?\d+(\.\d+)*(\s|\.|、)?)'
|
||||
pattern = r'^\s*([((]\d+[))]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、)?)'
|
||||
data = re.sub(pattern, '', data).strip()
|
||||
keyword_match = re.search(keywords, data)
|
||||
if keyword_match:
|
||||
@ -106,14 +107,14 @@ def clean_dict_datas(extracted_contents, keywords,excludes): #让正则表达
|
||||
all_texts1.append(cleaned_text) # 将处理后的文本添加到结果列表
|
||||
|
||||
else:
|
||||
pattern = r'^\s*([((]\d+[))]|[A-Za-z]?\d+(\.\d+)*(\s|\.|、)?)'
|
||||
pattern = r'^\s*([((]\d+[))]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、)?)'
|
||||
data = re.sub(pattern, '', text_list[0]).strip()
|
||||
# 将修改后的第一个元素和剩余的元素连接起来
|
||||
text_list[0] = data # 更新列表中的第一个元素
|
||||
joined_text = "\n".join(text_list) # 如果列表中有多个元素,则连接它们
|
||||
all_texts2.append(joined_text) # 将每个列表的内容添加到 all_texts 中
|
||||
|
||||
return all_texts1,all_texts2
|
||||
return all_texts1,all_texts2 #all_texts1要额外用gpt all_text2直接返回结果
|
||||
def find_sentences_with_keywords(data, keywords, follow_up_keywords):
|
||||
"""递归查找并返回包含关键词的句子列表,并根据是否存在后续关键词分别存储到两个列表中。"""
|
||||
sentences1 = [] # 保存没有后续关键词的情况
|
||||
@ -236,8 +237,9 @@ def handle_query(file_path, user_query, output_file, result_key, keywords, trunc
|
||||
print("starting qianwen-long...")
|
||||
qianwen_ans = qianwen_long(file_id, user_query)
|
||||
selected_contents = []
|
||||
num_list = json.loads(qianwen_ans)
|
||||
num_list = process_string_list(qianwen_ans)
|
||||
print(num_list)
|
||||
|
||||
for index in num_list:
|
||||
if index - 1 < len(qianwen_txt):
|
||||
content = qianwen_txt[index - 1] # 转换序号为索引(假设序号从1开始)
|
||||
@ -253,14 +255,14 @@ def handle_query(file_path, user_query, output_file, result_key, keywords, trunc
|
||||
res = {result_key: ""} # Set the response to empty if no contents were extracted
|
||||
return res
|
||||
|
||||
def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,truncate4):
|
||||
def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,truncate3):
|
||||
print("starting无效标与废标...")
|
||||
queries = [
|
||||
(r'否\s*决|无\s*效\s*投\s*标|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
|
||||
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号。",
|
||||
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。",
|
||||
os.path.join(output_dir, "temp1.txt"), "否决和无效投标情形"),
|
||||
(r'废\s*标',
|
||||
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号。",
|
||||
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,x为符合的信息的序号,若情况不存在,返回[]。",
|
||||
os.path.join(output_dir, "temp2.txt"), "废标项")
|
||||
]
|
||||
results = []
|
||||
@ -277,7 +279,9 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
|
||||
for future in futures:
|
||||
results.append(future.result())
|
||||
|
||||
forbidden_res = find_forbidden(truncate_json_path, clause_path, truncate4)
|
||||
#禁止投标
|
||||
print("starting不得存在的情形...")
|
||||
forbidden_res = find_forbidden(truncate_json_path, clause_path, truncate3)
|
||||
results.append(forbidden_res)
|
||||
|
||||
combined_dict = {}
|
||||
@ -288,15 +292,15 @@ def combine_find_invalid(file_path, output_dir, truncate_json_path,clause_path,t
|
||||
return nest_json_under_key(combined_dict, "无效标与废标项")
|
||||
|
||||
|
||||
#TODO:1.运行时间约80s,如果成为短板需要优化多线程 2.没有提取评标办法前附表中的表格 3.提取表格时根据中文的句号分割 4.qianwen-long存在bug
|
||||
#TODO:1.运行时间约80s,如果成为短板需要优化多线程
|
||||
if __name__ == '__main__':
|
||||
start_time = time.time()
|
||||
truncate_json_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\truncate_output.json"
|
||||
clause_path="C:\\Users\\Administrator\\Desktop\\招标文件\\clause.json"
|
||||
truncate4="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
|
||||
output_dir = "C:\\Users\\Administrator\\Desktop\\招标文件"
|
||||
doc_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.docx'
|
||||
results = combine_find_invalid(doc_path, output_dir,truncate_json_path,clause_path,truncate4)
|
||||
truncate3="C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
|
||||
output_dir = "C:\\Users\\Administrator\\Desktop\\货物标\\output"
|
||||
doc_path = 'C:\\Users\\Administrator\\Desktop\\货物标\\output\\zbtest11_invalid.docx'
|
||||
results = combine_find_invalid(doc_path, output_dir,truncate_json_path,clause_path,truncate3)
|
||||
end_time = time.time()
|
||||
print("Elapsed time:", str(end_time - start_time))
|
||||
print("Results:", results)
|
||||
|
@ -105,10 +105,16 @@ def process_string_list(string_list):
|
||||
# 获取匹配的内容,即方括号内的部分
|
||||
content_inside_brackets = match.group(1)
|
||||
if content_inside_brackets: # 检查内容是否为空
|
||||
# 将每个项目用引号包裹,并确保适当的空格和逗号处理
|
||||
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
# 检查内容是否是数字列表
|
||||
if all(item.strip().isdigit() for item in content_inside_brackets.split(',')):
|
||||
# 如果是数字,不用加引号,直接保留数字
|
||||
formatted_list = '[' + ', '.join(item.strip() for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
# 如果不全是数字,按字符串处理
|
||||
formatted_list = '[' + ', '.join(f"'{item.strip()}'" for item in content_inside_brackets.split(',') if item.strip()) + ']'
|
||||
else:
|
||||
return [] # 直接返回空列表如果内容为空
|
||||
|
||||
# 使用 ast.literal_eval 来解析格式化后的字符串
|
||||
try:
|
||||
actual_list = ast.literal_eval(formatted_list)
|
||||
@ -123,18 +129,17 @@ def find_forbidden(truncate_json_path,clause_path,truncate4): #投标人须
|
||||
# output_filename="merged.pdf"
|
||||
# paths=[truncate1,truncate4]
|
||||
# merged_filepath=merge_pdfs(paths,output_filename) #暂时废弃,评分前附表中的在'否决投标'中摘录了。
|
||||
|
||||
file_id=upload_file(truncate4)
|
||||
#user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请按json列表格式给我提供信息,键名为'不得存在的其他情形',请你不要回答有关\"信誉要求\"的内容,若文件中未说明,请在键值中填'未知'。"
|
||||
# user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请按json列表格式给我提供信息,键名为'不得存在的其他情形',请你不要回答有关\"信誉要求\"的内容,若文件中未说明,请在键值中填'未知'。"
|
||||
user_query_forbidden = "该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...],请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。"
|
||||
qianwen_forbidden_str = qianwen_long(file_id, user_query_forbidden)
|
||||
print(qianwen_forbidden_str)
|
||||
actual_list=process_string_list(qianwen_forbidden_str)
|
||||
print(actual_list)
|
||||
actual_list=process_string_list(qianwen_forbidden_str) #提取出字符串列表 ["xxx","xx"]
|
||||
|
||||
includes = ["不得存在", "禁止投标"]
|
||||
forbidden_results = extract_and_format_from_paths([truncate_json_path, clause_path], includes)
|
||||
processed_results = extract_unique_items_from_texts(forbidden_results)
|
||||
print(processed_results)
|
||||
# print(processed_results)
|
||||
merged_forbidden_list = list(dict.fromkeys(actual_list + processed_results))
|
||||
forbidden_dict={'不得存在的其他情形':merged_forbidden_list}
|
||||
|
||||
@ -142,9 +147,9 @@ def find_forbidden(truncate_json_path,clause_path,truncate4): #投标人须
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
truncate_json_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\truncate_output.json"
|
||||
clause_path = "C:\\Users\\Administrator\\Desktop\\招标文件\\clause.json"
|
||||
truncate4 = "C:\\Users\\Administrator\\Desktop\\招标文件\\zbtest12_qualification.pdf"
|
||||
output_dir = "C:\\Users\\Administrator\\Desktop\\招标文件"
|
||||
doc_path = 'C:\\Users\\Administrator\\Desktop\\招标文件\\招标03.docx'
|
||||
truncate_json_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\truncate_output.json"
|
||||
clause_path = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\clause.json"
|
||||
truncate4 = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile_qualification.pdf"
|
||||
output_dir = "C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7"
|
||||
doc_path = 'C:\\Users\\Administrator\\Desktop\\fsdownload\\temp7\\3abb6e16-19db-42ad-9504-53bf1072dfe7\\ztbfile.docx'
|
||||
find_forbidden(truncate_json_path,clause_path,truncate4)
|
@ -8,7 +8,7 @@ from flask_app.main.资格评审 import process_qualification
|
||||
from flask_app.main.通义千问long import upload_file, qianwen_long
|
||||
|
||||
|
||||
def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
||||
def combine_review_standards(truncate1,truncate3,knowledge_name,truncate0_jsonpath,clause_path): #评标办法前附表
|
||||
# 形式评审、响应评审:千问
|
||||
print("starting形式响应评审...")
|
||||
file_id=upload_file(truncate1) #评标办法前附表
|
||||
@ -16,7 +16,7 @@ def combine_review_standards(truncate1,truncate4,knowledge_name,truncate0_jsonpa
|
||||
results = qianwen_long(file_id, user_query_1)
|
||||
original_dict_data = extract_content_from_json(results)
|
||||
qualification_review = original_dict_data.pop('资格评审标准', '默认值或None')
|
||||
final_qualify_json=process_qualification(qualification_review,truncate4,knowledge_name)
|
||||
final_qualify_json=process_qualification(qualification_review,truncate3,knowledge_name)
|
||||
form_response_dict=process_reviews(original_dict_data, knowledge_name, truncate0_jsonpath, clause_path)
|
||||
print("形式响应评审done")
|
||||
form_response_dict.update(final_qualify_json)
|
||||
|
0
flask_app/main/转化格式/pydocx_p2d.py
Normal file
0
flask_app/main/转化格式/pydocx_p2d.py
Normal file
31
flask_app/货物标/test.py
Normal file
31
flask_app/货物标/test.py
Normal file
@ -0,0 +1,31 @@
|
||||
import os
|
||||
|
||||
from docx import Document
|
||||
from docxcompose.composer import Composer
|
||||
|
||||
|
||||
def combine_docx(master, sub,index):
|
||||
if not os.path.exists(sub): # 待合并文件必须存在
|
||||
return False
|
||||
|
||||
if not master.endswith('.docx') or not sub.endswith('.docx'): # 主文件必须是docx格式(可以不存在)
|
||||
return False
|
||||
|
||||
if os.path.exists(master):
|
||||
doc_master = Document(master)
|
||||
doc_master.add_page_break()
|
||||
cp = Composer(doc_master)
|
||||
cp.append(Document(sub))
|
||||
else:
|
||||
# master不存在,则sub直接给master
|
||||
doc_master = Document(sub)
|
||||
|
||||
doc_master.save(master)
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
master = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\test.docx'
|
||||
sub = 'C:\\Users\\Administrator\\Desktop\\招标文件\\output1\\zbfile.docx'
|
||||
index = 2 # 假设你要在第二个元素位置插入
|
||||
combine_docx(master, sub,index)
|
Loading…
x
Reference in New Issue
Block a user