2.18 空的docx无法被识别为非招标文件
This commit is contained in:
parent
518f5d7ac7
commit
b25592da4d
@ -160,8 +160,7 @@ def generate_full_user_query(file_path, prompt_template):
|
|||||||
return user_query
|
return user_query
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
error_msg='''Error code: 400 - {'error': {'code': 'RequestTimeOut', 'param': None, 'message': 'Request timed out, please try again later.', 'type': 'RequestTimeOut'}, 'id': 'chatcmpl-a92698d3-4e21-9243-b900-e32d4df4ac49', 'request_id': 'a92698d3-4e21-9243-b900-e32d4df4ac49'}
|
error_msg='''openai.BadRequestError: Error code: 400 - {'error': {'code': 'invalid_parameter_error', 'param': None, 'message': 'File parsing in progress, please try again later.', 'type': 'invalid_request_error'}, 'id': 'chatcmpl-018e2b26-b3eb-907e-be6a-6ba8fccee86f', 'request_id': '018e2b26-b3eb-907e-be6a-6ba8fccee86f'}'''
|
||||||
'''
|
|
||||||
error_code, error_code_string, request_id=extract_error_details(error_msg)
|
error_code, error_code_string, request_id=extract_error_details(error_msg)
|
||||||
print(error_code)
|
print(error_code)
|
||||||
print(error_code_string)
|
print(error_code_string)
|
||||||
|
@ -99,7 +99,7 @@ def qianwen_long(file_id, user_query, max_retries=2, backoff_factor=1.0, need_ex
|
|||||||
break
|
break
|
||||||
elif error_code == 400 and error_code_string in [
|
elif error_code == 400 and error_code_string in [
|
||||||
'data_inspection_failed', 'ResponseTimeout', 'DataInspectionFailed',
|
'data_inspection_failed', 'ResponseTimeout', 'DataInspectionFailed',
|
||||||
'response_timeout', 'request_timeout', "RequestTimeOut"
|
'response_timeout', 'request_timeout', "RequestTimeOut","invalid_parameter_error"
|
||||||
]:
|
]:
|
||||||
logger.warning(f"错误代码为 400 - {error_code_string},将调用 qianwen_long_stream 执行一次...")
|
logger.warning(f"错误代码为 400 - {error_code_string},将调用 qianwen_long_stream 执行一次...")
|
||||||
try:
|
try:
|
||||||
@ -219,7 +219,7 @@ def qianwen_long_stream(file_id, user_query, max_retries=2, backoff_factor=1.0,
|
|||||||
break
|
break
|
||||||
elif error_code == 400 and error_code_string in [
|
elif error_code == 400 and error_code_string in [
|
||||||
'data_inspection_failed', 'ResponseTimeout', 'DataInspectionFailed',
|
'data_inspection_failed', 'ResponseTimeout', 'DataInspectionFailed',
|
||||||
'response_timeout', 'request_timeout', "RequestTimeOut"
|
'response_timeout', 'request_timeout', "RequestTimeOut","invalid_parameter_error"
|
||||||
]:
|
]:
|
||||||
if attempt == 1: # 只重试一次
|
if attempt == 1: # 只重试一次
|
||||||
logger.warning(f"错误代码为 400 - {error_code_string},将立即重试...")
|
logger.warning(f"错误代码为 400 - {error_code_string},将立即重试...")
|
||||||
|
@ -200,8 +200,6 @@ def merge_selected_pdfs(output_folder, truncate_files, output_path, base_file_na
|
|||||||
if matching_files:
|
if matching_files:
|
||||||
matching_files_sorted = sorted(matching_files)
|
matching_files_sorted = sorted(matching_files)
|
||||||
all_pdfs_to_merge.extend(matching_files_sorted)
|
all_pdfs_to_merge.extend(matching_files_sorted)
|
||||||
for f in matching_files_sorted:
|
|
||||||
print(f"选中文件: {f}")
|
|
||||||
else:
|
else:
|
||||||
if required:
|
if required:
|
||||||
print(f"没有找到以 '{suffix}' 结尾的文件。")
|
print(f"没有找到以 '{suffix}' 结尾的文件。")
|
||||||
|
@ -308,7 +308,7 @@ def get_requirements_with_gpt(merged_baseinfo_path, selection):
|
|||||||
}
|
}
|
||||||
""",
|
""",
|
||||||
3: """
|
3: """
|
||||||
该招标文件中重新招标(或重新采购)、不再招标(或不再采购)、终止招标(或终止采购)的情况分别是什么?请以json格式返回给我结果,键名分别为'重新招标','不再招标','终止招标',键值应该完全与原文内容保持一致,不得擅自总结删减,如果原文中未提及相关内容,在键值中填'未知'。
|
该招标文件中重新招标(或重新采购)、不再招标(或不再采购)、终止招标(或终止采购)的情况分别是什么?请以json格式返回给我结果,键名分别为'重新招标','不再招标','终止招标',键值应该基于实际的招标文件内容,不得擅自总结删减,更不得回答示例输出中的内容,如果原文中未提及相关内容,在相应键值中填'未知'。
|
||||||
示例输出如下,仅供格式参考:
|
示例输出如下,仅供格式参考:
|
||||||
{
|
{
|
||||||
"重新招标":"有下列情形之一的,招标人将重新招标:(1)投标截止时间止,投标人少于3个的;(2)经评标委员会评审后否决所有投标的;",
|
"重新招标":"有下列情形之一的,招标人将重新招标:(1)投标截止时间止,投标人少于3个的;(2)经评标委员会评审后否决所有投标的;",
|
||||||
|
@ -66,10 +66,10 @@ def create_logger(app, subfolder):
|
|||||||
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
file_formatter = CSTFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
file_handler.setFormatter(file_formatter)
|
file_handler.setFormatter(file_formatter)
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
stream_handler = logging.StreamHandler()
|
# stream_handler = logging.StreamHandler()
|
||||||
stream_handler.setFormatter(logging.Formatter('%(message)s'))
|
# stream_handler.setFormatter(logging.Formatter('%(message)s'))
|
||||||
logger.addHandler(stream_handler)
|
# logger.addHandler(stream_handler)
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO) #Logger 只会处理大于或等于 INFO 级别的日志消息(例如 INFO、WARNING、ERROR、CRITICAL),而 DEBUG 级别的消息会被忽略。
|
||||||
logger.propagate = False
|
logger.propagate = False
|
||||||
g.logger = logger
|
g.logger = logger
|
||||||
g.output_folder = output_folder #输出文件夹路径
|
g.output_folder = output_folder #输出文件夹路径
|
||||||
|
@ -20,6 +20,7 @@ def process_pdf(pdf_path):
|
|||||||
"""子进程里实际执行的函数"""
|
"""子进程里实际执行的函数"""
|
||||||
result = read_pdf_main(pdf_path=pdf_path)
|
result = read_pdf_main(pdf_path=pdf_path)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
logger = get_global_logger("123")
|
logger = get_global_logger("123")
|
||||||
|
30
flask_app/test_case/test_内存泄漏2.py
Normal file
30
flask_app/test_case/test_内存泄漏2.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import tracemalloc
|
||||||
|
from PyPDF2 import PdfReader
|
||||||
|
|
||||||
|
def extract_text_by_page(file_path):
|
||||||
|
result = ""
|
||||||
|
with open(file_path, 'rb') as file:
|
||||||
|
reader =PdfReader(file)
|
||||||
|
num_pages = len(reader.pages)
|
||||||
|
# print(f"Total pages: {num_pages}")
|
||||||
|
for page_num in range(num_pages):
|
||||||
|
page = reader.pages[page_num]
|
||||||
|
text = page.extract_text()
|
||||||
|
return result
|
||||||
|
|
||||||
|
# 开始跟踪内存分配
|
||||||
|
tracemalloc.start()
|
||||||
|
# 捕捉函数调用前的内存快照
|
||||||
|
snapshot_before = tracemalloc.take_snapshot()
|
||||||
|
# 调用函数
|
||||||
|
file_path=r'C:\Users\Administrator\Desktop\fsdownload\00550cfc-fd33-469e-8272-9215291b175c\ztbfile.pdf'
|
||||||
|
result = extract_text_by_page(file_path)
|
||||||
|
# 捕捉函数调用后的内存快照
|
||||||
|
snapshot_after = tracemalloc.take_snapshot()
|
||||||
|
# 比较两个快照,获取内存分配差异信息
|
||||||
|
stats = snapshot_after.compare_to(snapshot_before, 'lineno')
|
||||||
|
print("[ Top 10 内存变化 ]")
|
||||||
|
for stat in stats[:10]:
|
||||||
|
print(stat)
|
||||||
|
# 停止内存分配跟踪
|
||||||
|
tracemalloc.stop()
|
Loading…
x
Reference in New Issue
Block a user