11.17 修复了提取货物标的Bug,无效投标bug修复

This commit is contained in:
zy123 2024-11-17 17:27:05 +08:00
parent b94c62b7e6
commit c93261207d
14 changed files with 33 additions and 153 deletions

View File

@ -185,7 +185,7 @@ def process_nested_data(data):
#生成无结构的数据货物标 #生成无结构的数据货物标
def concatenate_keys_values(section_content): def concatenate_keys_values(section_content):
print(json.dumps(section_content, ensure_ascii=False, indent=4)) # print(json.dumps(section_content, ensure_ascii=False, indent=4))
""" """
将章节内容的键值对拼接成一个字符串列表每个元素为 "key value" 将章节内容的键值对拼接成一个字符串列表每个元素为 "key value"

View File

@ -3,7 +3,7 @@ import os
import re import re
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.general.doubao import doubao_model, generate_full_user_query
from docx import Document from docx import Document
from flask_app.general.通义千问long import upload_file, qianwen_long_text from flask_app.general.通义千问long import upload_file, qianwen_long_text
@ -39,7 +39,7 @@ def read_docx_last_column(truncate_file):
# 完整读取文件中所有表格适合pdf转docx价格便宜的情况优先推荐内容完整 # 完整读取文件中所有表格适合pdf转docx价格便宜的情况优先推荐内容完整
def read_tables_from_docx(file_path): def read_tables_from_docx(file_path):
print(file_path) # print(file_path)
# 尝试打开文档 # 尝试打开文档
try: try:
doc = Document(file_path) doc = Document(file_path)
@ -282,15 +282,13 @@ def clean_dict_datas(extracted_contents, keywords, excludes): # 让正则表达
for key, text_list in extracted_contents.items(): for key, text_list in extracted_contents.items():
if len(text_list) == 1: if len(text_list) == 1:
# print(text_list)
# print("------------------")
for data in text_list: for data in text_list:
# print(data) # print(data)
# 检查是否包含任何需要排除的字符串 # 检查是否包含任何需要排除的字符串
if any(exclude in data for exclude in excludes): if any(exclude in data for exclude in excludes):
continue # 如果包含任何排除字符串,跳过这个数据 continue # 如果包含任何排除字符串,跳过这个数据
# 去掉开头的序号eg:1 | (1) |2 | 1. | 2全角点| 3、 | 1.1 | 2.3.4 | A1 | C1.1 | 一、 # 去掉开头的序号eg:1 | (1) |2 | 1. | 2全角点| 3、 | 1.1 | 2.3.4 | A1 | C1.1 | 一、
pattern = r'^\s*([(]\d+[)]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、|)?|[一二三四五六七八九十]+、)' pattern = r'^\s*([(]\d+[))]|[A-Za-z]?\d+\s*(\.\s*\d+)*[\s\.、.)\]|[一二三四五六七八九十]+、)'
data = re.sub(pattern, '', data).strip() data = re.sub(pattern, '', data).strip()
keyword_match = re.search(keywords, data) keyword_match = re.search(keywords, data)
if keyword_match: if keyword_match:
@ -320,7 +318,7 @@ def clean_dict_datas(extracted_contents, keywords, excludes): # 让正则表达
# print("*********") # print("*********")
new_text_list = preprocess_text_list(text_list) new_text_list = preprocess_text_list(text_list)
# 用于处理结构化文本,清理掉不必要的序号,并将分割后的段落合并,最终形成更简洁和格式化的输出。 # 用于处理结构化文本,清理掉不必要的序号,并将分割后的段落合并,最终形成更简洁和格式化的输出。
pattern = r'^\s*([(]\d+[)]|[A-Za-z]?\d+\s*(\.\s*\d+)*(\s|\.|、|)?|[一二三四五六七八九十]+、)' pattern = r'^\s*([(]\d+[))]|[A-Za-z]?\d+\s*(\.\s*\d+)*[\s\.、.)\]|[一二三四五六七八九十]+、)'
data = re.sub(pattern, '', new_text_list[0]).strip() # 去除序号 data = re.sub(pattern, '', new_text_list[0]).strip() # 去除序号
# 将修改后的第一个元素和剩余的元素连接起来 # 将修改后的第一个元素和剩余的元素连接起来
@ -373,7 +371,7 @@ def extract_table_with_keywords(data, keywords, follow_up_keywords):
split_sentences = re.split( split_sentences = re.split(
r'(?<=[。!?!?\?])|' # 在中文句号、感叹号、问号或分号后面分割 r'(?<=[。!?!?\?])|' # 在中文句号、感叹号、问号或分号后面分割
r'(?=\d+[.]\d+)|' # 在类似1.1的数字序号前分割 r'(?=\d+[.]\d+)|' # 在类似1.1的数字序号前分割
r'(?=\d+\s(?![号条款节章项例页段部步点年月日时分秒]))|' # 数字后面跟空格且空格后面不是指定关键字时分割 r'(?=\d+\s(?![号条款节章项例页段部步点年月日时分秒]))|' # 数字后面跟空格且空格后面不是指定关键字时分割
r'(?=\d+[、.])|' # 在数字后直接跟顿号、半角点号或全角点号时分割 r'(?=\d+[、.])|' # 在数字后直接跟顿号、半角点号或全角点号时分割
r'(?=[A-Za-z][.]\s*)|' # 在字母加点如A.、a.)前分割 r'(?=[A-Za-z][.]\s*)|' # 在字母加点如A.、a.)前分割
r'(?=[A-Za-z]+\s*\d+\s*(?:[.]\s*\d+)*)|' # 在可选字母加数字或多级编号前分割 r'(?=[A-Za-z]+\s*\d+\s*(?:[.]\s*\d+)*)|' # 在可选字母加数字或多级编号前分割
@ -478,7 +476,6 @@ def handle_query(file_path, user_query, output_file, result_key, keywords):
qianwen_txt = all_texts1 + all_tables1 qianwen_txt = all_texts1 + all_tables1
# Proceed only if there is content to write # Proceed only if there is content to write
selected_contents = set() # 使用 set 去重 selected_contents = set() # 使用 set 去重
if qianwen_txt: if qianwen_txt:
with open(output_file, 'w', encoding='utf-8') as file: with open(output_file, 'w', encoding='utf-8') as file:
counter = 1 counter = 1
@ -486,11 +483,12 @@ def handle_query(file_path, user_query, output_file, result_key, keywords):
file.write(f"{counter}. {content}\n") file.write(f"{counter}. {content}\n")
file.write("..............." + '\n') file.write("..............." + '\n')
counter += 1 counter += 1
user_query = generate_full_user_query(output_file, user_query)
file_id = upload_file(output_file) model_ans=doubao_model(user_query) #豆包
# qianwen_ans = qianwen_long(file_id, user_query) # file_id = upload_file(output_file)
qianwen_ans = qianwen_long_text(file_id, user_query) # model_ans = qianwen_long(file_id, user_query)
num_list = process_string_list(qianwen_ans) # model_ans = qianwen_long_text(file_id, user_query)
num_list = process_string_list(model_ans)
print(result_key + "选中的序号:" + str(num_list)) print(result_key + "选中的序号:" + str(num_list))
for index in num_list: for index in num_list:
@ -525,19 +523,25 @@ def combine_find_invalid(file_path, output_dir):
queries = [ queries = [
( (
r'\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效', r'\s*决|无\s*效\s*投\s*标|无\s*效\s*文\s*件|文\s*件\s*无\s*效|无\s*效\s*响\s*应|无\s*效\s*报\s*价|无\s*效\s*标|视\s*为\s*无\s*效|被\s*拒\s*绝|予\s*以\s*拒\s*绝|投\s*标\s*失\s*效|投\s*标\s*无\s*效',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。", """以下是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:否决投标或拒绝投标或无效投标或投标失效的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。
文本内容{full_text}
""",
os.path.join(output_dir, "temp1.txt"), os.path.join(output_dir, "temp1.txt"),
"否决和无效投标情形" "否决和无效投标情形"
), ),
( (
r'\s*标', r'\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。", """以下是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,请你根据该内容回答:废标项的情况有哪些?文本中可能存在无关的信息,请你准确筛选符合的信息并将它的序号返回。请以[x,x,x]格式返回给我结果x为符合的信息的序号若情况不存在返回[]。
文本内容{full_text}
""",
os.path.join(output_dir, "temp2.txt"), os.path.join(output_dir, "temp2.txt"),
"废标项" "废标项"
), ),
( (
r'\s*得|禁\s*止\s*投\s*标', r'\s*得|禁\s*止\s*投\s*标',
"以上是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,投标人或中标人或供应商或联合体投标各方或磋商小组不得存在的情形或禁止投标的情形有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情形不存在,返回[]。以下为需要考虑的注意事项:请返回包含实际内容的信息,若信息内容诸如'投标人不得存在的其他关联情形'这样笼统的表格,而未说明具体的情形,则无需添加这条信息。", """以下是从招标文件中摘取的内容,文本内之间的信息以'...............'分割,每条信息规定了各方不得存在的情形,请回答:在这些信息中,投标人或中标人或供应商或联合体投标各方或磋商小组不得存在的情形或禁止投标的情形有哪些?不要返回主语是招标人或采购人或评标委员会的信息,请你筛选所需的信息并将它的序号返回。请以[x,x,x]格式返回给我结果,示例返回为[1,4,6],若情形不存在,返回[]。以下为需要考虑的注意事项:请返回包含实际内容的信息,若信息内容诸如'投标人不得存在的其他关联情形'这样笼统的表格,而未说明具体的情形,则无需添加这条信息。
文本内容{full_text}
""",
os.path.join(output_dir, "temp3.txt"), os.path.join(output_dir, "temp3.txt"),
"不得存在的情形" "不得存在的情形"
) )
@ -581,8 +585,8 @@ if __name__ == '__main__':
# truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件实高电子显示屏_tobidders_notice_part1.docx" # truncate_file="C:\\Users\\Administrator\\Desktop\\货物标\\output4\\招标文件实高电子显示屏_tobidders_notice_part1.docx"
# clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json" # clause_path = "D:\\flask_project\\flask_app\\static\\output\\output1\\77a48c63-f39f-419b-af2a-7b3dbf41b70b\\clause1.json"
# doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx" # doc_path="C:\\Users\\Administrator\\Desktop\\货物标\\zbfilesdocx\\磋商文件(1).docx"
doc_path = r'D:\flask_project\flask_app\static\output\output1\6bf9792c-49a0-4a6a-8203-14b001f87911\ztbfile_invalid.docx' doc_path = r'D:\flask_project\flask_app\static\output\output1\2179c978-b638-4332-8bd2-2007ad6f7c9b\ztbfile.docx'
output_dir = r"D:\flask_project\flask_app\static\output\output1\6bf9792c-49a0-4a6a-8203-14b001f87911\tmp" output_dir = r"D:\flask_project\flask_app\static\output\output1\2179c978-b638-4332-8bd2-2007ad6f7c9b\tmp"
results = combine_find_invalid(doc_path, output_dir) results = combine_find_invalid(doc_path, output_dir)
end_time = time.time() end_time = time.time()
print("Results:", json.dumps(results, ensure_ascii=False, indent=4)) print("Results:", json.dumps(results, ensure_ascii=False, indent=4))

View File

@ -90,7 +90,7 @@ def read_docx_by_paragraphs(file_path):
return [] return []
if __name__ == "__main__": if __name__ == "__main__":
file_path = r'D:\flask_project\flask_app\static\output\output1\6bf9792c-49a0-4a6a-8203-14b001f87911\ztbfile_invalid.docx' file_path = r'D:\flask_project\flask_app\static\output\output1\2179c978-b638-4332-8bd2-2007ad6f7c9b\ztbfile.docx'
read_docx(file_path) #按行读取 read_docx(file_path) #按行读取
# paragraphs = read_docx_by_paragraphs(file_path) #按段落读取 # paragraphs = read_docx_by_paragraphs(file_path) #按段落读取

View File

@ -5,10 +5,8 @@ import os
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.general.投标人须知正文提取指定内容 import get_requirements_with_gpt
from flask_app.main.截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from flask_app.general.merge_pdfs import merge_pdfs from flask_app.general.merge_pdfs import merge_pdfs
from flask_app.main.table_content_extraction import extract_tables_main
from flask_app.main.提取json工程标版 import convert_clause_to_json from flask_app.main.提取json工程标版 import convert_clause_to_json
from flask_app.general.json_utils import transform_json_values from flask_app.general.json_utils import transform_json_values
from flask_app.general.无效标和废标公共代码 import combine_find_invalid from flask_app.general.无效标和废标公共代码 import combine_find_invalid
@ -18,9 +16,7 @@ from flask_app.main.基础信息整合快速版 import combine_basic_info
from flask_app.main.资格审查模块 import combine_review_standards from flask_app.main.资格审查模块 import combine_review_standards
from flask_app.main.商务评分技术评分整合 import combine_evaluation_standards from flask_app.main.商务评分技术评分整合 import combine_evaluation_standards
from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx from flask_app.general.format_change import pdf2docx, docx2pdf,doc2docx
from flask_app.general.docx截取docx import copy_docx
from flask_app.general.通义千问long import upload_file,qianwen_long
from flask_app.general.json_utils import clean_json_string
def get_global_logger(unique_id): def get_global_logger(unique_id):
if unique_id is None: if unique_id is None:

View File

@ -4,7 +4,7 @@ import time
from flask_app.general.format_change import pdf2docx from flask_app.general.format_change import pdf2docx
from flask_app.general.json_utils import extract_content_from_json, clean_json_string from flask_app.general.json_utils import extract_content_from_json, clean_json_string
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.main.形式响应评审 import process_reviews from flask_app.main.形式响应评审 import process_reviews
from flask_app.main.资格评审 import process_qualification from flask_app.main.资格评审 import process_qualification
from flask_app.general.通义千问long import upload_file, qianwen_long from flask_app.general.通义千问long import upload_file, qianwen_long

View File

@ -1,120 +0,0 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from flask_app.general.通义千问long import upload_file, qianwen_long
from flask_app.general.json_utils import clean_json_string
def combine_qualification_new(invalid_path, qualification_path,notice_path):
detailed_res = {}
# 初始化无效文件ID
invalid_file_id = None
if qualification_path:
# 上传资格文件并获取文件ID
qualification_file_id = upload_file(qualification_path)
# 定义第一个查询,用于检查资格性审查是否存在
first_query = """
该文档中是否有关于资格性审查标准的具体内容?请以json格式给出回答,外键为'资格性审查',键值仅限于'','',输出格式示例如下:
{
"资格性审查":""
}
"""
# 执行第一个查询并清洗返回的JSON字符串
print("call first_query")
first_res = clean_json_string(qianwen_long(qualification_file_id, first_query))
# 判断是否存在资格性审查
zige_file_id = qualification_file_id if first_res.get("资格性审查") == "" else None
# 如果需要,上传无效文件
if zige_file_id is None:
if invalid_file_id is None:
invalid_file_id = upload_file(invalid_path)
zige_file_id = invalid_file_id
else:
# 如果 qualification_path 为空,直接使用无效文件
zige_file_id = upload_file(invalid_path)
# 定义第二组查询,仅包含资格性审查
second_query = [
{
"key": "资格性审查",
"query": "该招标文件中规定的资格性审查标准是怎样的请以json格式给出外层为'资格性审查',你的回答要与原文完全一致,不可擅自总结删减,也不要回答有关符合性审查的内容。"
}
]
# 定义任务函数
def process_second_query(key, query, file_id):
print("call second_query")
try:
res = qianwen_long(file_id, query)
cleaned_res = clean_json_string(res)
return key, cleaned_res.get(key, "未找到相关内容")
except Exception as e:
print(f"执行查询 '{key}' 时出错: {e}")
return key, "查询失败"
def process_notice(notice_path):
print("call notice_path")
try:
# 上传通知文件并获取文件ID
file_id1 = upload_file(notice_path)
# 定义用户查询,提取申请人资格要求
user_query1 = """
第一章招标公告投标邀请书中说明的申请人资格要求是怎样的请以json格式给出回答外键为'申请人资格要求'键值为字符串列表其中每个字符串对应原文中的一条要求你的回答与原文内容一致不要擅自总结删减输出格式示例如下
{
"申请人资格要求":[
"1.满足《中华人民共和国政府采购法》第二十二条规定;",
"1.1 法人或者其他组织的营业执照等证明文件,如供应商是自然人的提供身份证明材料;",
"2.未被列入“信用中国”网站(www.creditchina.gov.cn)信用服务栏失信被执行人、重大税收违法案件当事人名单;"
]
}
"""
# 执行查询并清洗结果
res1 = clean_json_string(qianwen_long(file_id1, user_query1))
# 提取申请人资格要求
requirements = res1.get("申请人资格要求", "未找到相关内容")
return "申请人资格要求", requirements
except Exception as e:
print(f"处理申请人资格要求时出错: {e}")
return "申请人资格要求", "处理失败"
# 初始化 ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=2) as executor:
future_to_key = {}
# 提交第二组查询
for query_info in second_query:
key = query_info["key"]
query = query_info["query"]
current_file_id = zige_file_id
future = executor.submit(process_second_query, key, query, current_file_id)
future_to_key[future] = key
# 有条件地提交通知处理
if notice_path:
future = executor.submit(process_notice, notice_path)
future_to_key[future] = "申请人资格要求"
else:
future = executor.submit(process_notice, invalid_path)
future_to_key[future] = "申请人资格要求"
# 收集结果(按完成顺序)
for future in as_completed(future_to_key):
key, result = future.result()
detailed_res[key] = result
# 定义所需的顺序
desired_order = ["申请人资格要求", "资格性审查"]
# print(json.dumps(detailed_res,ensure_ascii=False,indent=4))
# 创建一个新的有序字典
ordered_res = {}
for key in desired_order:
if key in detailed_res:
ordered_res[key] = detailed_res[key]
# 最终处理结果,例如打印或保存
return {"资格审查": ordered_res}

View File

@ -4,7 +4,7 @@ import logging
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.old_version.文档理解大模型版知识库处理.知识库操作 import addfileToKnowledge, deleteKnowledge from flask_app.old_version.文档理解大模型版知识库处理.知识库操作 import addfileToKnowledge, deleteKnowledge
from flask_app.main.提取json工程标版 import convert_clause_to_json from flask_app.main.提取json工程标版 import convert_clause_to_json
from flask_app.general.json_utils import transform_json_values from flask_app.general.json_utils import transform_json_values

View File

@ -8,7 +8,7 @@ from flask_app.general.format_change import pdf2docx
from flask_app.general.通义千问long import upload_file, qianwen_long_text from flask_app.general.通义千问long import upload_file, qianwen_long_text
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.old_version.不得存在及禁止投标情形 import find_forbidden, process_string_list from flask_app.old_version.不得存在及禁止投标情形 import find_forbidden, process_string_list

View File

@ -4,7 +4,7 @@ import logging
import time import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from flask_app.main.截取pdf import truncate_pdf_multiple from flask_app.main.截取pdf import truncate_pdf_multiple
from flask_app.main.table_content_extraction import extract_tables_main from flask_app.general.table_content_extraction import extract_tables_main
from flask_app.main.提取json工程标版 import convert_clause_to_json from flask_app.main.提取json工程标版 import convert_clause_to_json
from flask_app.general.json_utils import transform_json_values from flask_app.general.json_utils import transform_json_values
from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid from flask_app.main.无效标和废标和禁止投标整合 import combine_find_invalid

View File

@ -292,7 +292,7 @@ def get_technical_requirements(file_path,invalid_path):
5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称 5.不包含'说明''规格''技术参数'等列内容仅返回采购的货物或系统或模块名称
特殊情况处理 特殊情况处理
同一层级如同一系统中同名但采购要求不同的货物'货物名-编号'区分编号从1递增 同一层级如同一系统中存在同名但采购要求不同的货物'货物名-编号'区分编号从1递增规避重复键名的问题
示例输出1普通系统货物类采购 示例输出1普通系统货物类采购
{{ {{

View File

@ -20,7 +20,7 @@ def fetch_procurement_reqs(procurement_path, invalid_path):
} }
# 如果 procurement_docpath 是空字符串,直接返回包含空字符串的字典 # 如果 procurement_docpath 是空字符串,直接返回包含空字符串的字典
if not procurement_docpath: if not procurement_path:
return DEFAULT_PROCUREMENT_REQS.copy() return DEFAULT_PROCUREMENT_REQS.copy()
try: try:

View File

@ -78,7 +78,7 @@ def preprocess_files(output_folder, file_path, file_type,logger):
'merged_baseinfo_path': merged_baseinfo_path 'merged_baseinfo_path': merged_baseinfo_path
} }
def fetch_project_basic_info(invalid_path,invalid_docpath, merged_baseinfo_path, procurement_path, clause_path,logger): def fetch_project_basic_info(invalid_path,merged_baseinfo_path, procurement_path, clause_path,logger):
logger.info("starting 基础信息...") logger.info("starting 基础信息...")
start_time = time.time() start_time = time.time()
if not merged_baseinfo_path: if not merged_baseinfo_path:
@ -202,7 +202,7 @@ def goods_bid_main(output_folder, file_path, file_type, unique_id):
'bidding_documents_requirements': executor.submit(fetch_bidding_documents_requirements,processed_data['invalid_path'],processed_data['merged_baseinfo_path'], 'bidding_documents_requirements': executor.submit(fetch_bidding_documents_requirements,processed_data['invalid_path'],processed_data['merged_baseinfo_path'],
processed_data['clause_path'],logger), processed_data['clause_path'],logger),
'opening_bid': executor.submit(fetch_bid_opening, processed_data['invalid_path'],processed_data['merged_baseinfo_path'],processed_data['clause_path'],logger), 'opening_bid': executor.submit(fetch_bid_opening, processed_data['invalid_path'],processed_data['merged_baseinfo_path'],processed_data['clause_path'],logger),
'base_info': executor.submit(fetch_project_basic_info, processed_data['invalid_path'],processed_data['invalid_docpath'],processed_data['merged_baseinfo_path'], 'base_info': executor.submit(fetch_project_basic_info, processed_data['invalid_path'],processed_data['merged_baseinfo_path'],
processed_data['procurement_path'],processed_data['clause_path'],logger), processed_data['procurement_path'],processed_data['clause_path'],logger),
'qualification_review': executor.submit(fetch_qualification_review, processed_data['invalid_path'], 'qualification_review': executor.submit(fetch_qualification_review, processed_data['invalid_path'],
processed_data['qualification_path'], processed_data['qualification_path'],