diff --git a/docker-compose.yml b/docker-compose.yml index 9a5f0bd..e94d316 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,7 +9,7 @@ services: - .env # 使用 .env 文件中的环境变量 volumes: # - .:/flask_project # 将当前目录挂载到容器的 /flask_project 目录(可选,便于开发时实时更新代码) - - /home/Z/zbparse_output:/flask_project/flask_app/static/output # 额外的数据卷挂载 + - /home/Z/zbparse_output_dev:/flask_project/flask_app/static/output # 额外的数据卷挂载 restart: unless-stopped # 容器退出时自动重启,除非明确停止 privileged: true diff --git a/flask_app/general/多线程提问.py b/flask_app/general/多线程提问.py index 2e7c947..cea3a72 100644 --- a/flask_app/general/多线程提问.py +++ b/flask_app/general/多线程提问.py @@ -12,7 +12,6 @@ import requests from dashscope import Assistants, Messages, Runs, Threads from llama_index.indices.managed.dashscope import DashScopeCloudRetriever from flask_app.general.通义千问long import qianwen_long, upload_file - prompt = """ # 角色 你是一个文档处理专家,专门负责理解和操作基于特定内容的文档任务,这包括解析、总结、搜索或生成与给定文档相关的各类信息。 @@ -268,7 +267,7 @@ def multi_threading(queries, knowledge_name="", file_id="", llm_type=1): with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor: future_to_query = {} for index, query in enumerate(queries): - time.sleep(0.7) # 每提交一个任务后等待0.5秒 + # time.sleep(0.5) # 每提交一个任务后等待0.5秒,目前设置了直接对qianwen-long直接限制,无需sleep future = executor.submit(llm_call, query, knowledge_name, file_id, result_queue, index, llm_type) future_to_query[future] = index retry_counts[index] = 0 # 初始化重试次数 diff --git a/flask_app/general/无效标和废标公共代码.py b/flask_app/general/无效标和废标公共代码.py index bdcf2a6..34c7c1f 100644 --- a/flask_app/general/无效标和废标公共代码.py +++ b/flask_app/general/无效标和废标公共代码.py @@ -5,8 +5,6 @@ import time from concurrent.futures import ThreadPoolExecutor from flask_app.general.doubao import doubao_model, generate_full_user_query from docx import Document - -from flask_app.general.通义千问long import upload_file, qianwen_long_text from flask_app.general.通用功能函数 import process_string_list diff --git a/flask_app/general/通义千问long.py b/flask_app/general/通义千问long.py index 9f47a65..01c6b9d 100644 --- a/flask_app/general/通义千问long.py +++ b/flask_app/general/通义千问long.py @@ -1,4 +1,7 @@ import json +from functools import wraps + +from ratelimit import limits, sleep_and_retry import random import time from pathlib import Path @@ -16,6 +19,19 @@ def upload_file(file_path): file = client.files.create(file=Path(file_path), purpose="file-extract") return file.id +@sleep_and_retry +@limits(calls=4, period=1) # 每秒最多调用4次 +def rate_limiter(): + pass # 这个函数本身不执行任何操作,只用于限流 + +# 创建一个共享的装饰器 +def shared_rate_limit(func): + @wraps(func) + def wrapper(*args, **kwargs): + rate_limiter() # 通过共享的限流器 + return func(*args, **kwargs) + return wrapper +@shared_rate_limit def qianwen_long(file_id, user_query): print("call qianwen-long...") """ @@ -48,38 +64,7 @@ def qianwen_long(file_id, user_query): # Return the response content # return completion.choices[0].message.content,completion.usage return completion.choices[0].message.content - -def qianwen_long_text(file_id, user_query): - print("call qianwen-long text...") - """ - Uses a previously uploaded file to generate a response based on a user query. - """ - client = OpenAI( - api_key=os.getenv("DASHSCOPE_API_KEY"), - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" - ) - - # Generate a response based on the file ID - completion = client.chat.completions.create( - model="qwen-long", - # top_p=0.5, - temperature=0.5, - messages=[ - { - 'role': 'system', - 'content': f'fileid://{file_id}' - }, - { - 'role': 'user', - 'content': user_query - } - ], - stream=False - ) - - # Return the response content - return completion.choices[0].message.content - +@shared_rate_limit def qianwen_long_stream(file_id, user_query): print("调用 qianwen-long stream...") """ @@ -135,10 +120,39 @@ def qianwen_long_stream(file_id, user_query): print("\n中断流式响应。") except Exception as e: print(f"\n处理流式响应时出错: {e}") - - print() # 换行 return full_response # 返回完整的响应内容 +@shared_rate_limit +def qianwen_long_text(file_id, user_query): + print("call qianwen-long text...") + """ + Uses a previously uploaded file to generate a response based on a user query. + """ + client = OpenAI( + api_key=os.getenv("DASHSCOPE_API_KEY"), + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" + ) + + # Generate a response based on the file ID + completion = client.chat.completions.create( + model="qwen-long", + # top_p=0.5, + temperature=0.5, + messages=[ + { + 'role': 'system', + 'content': f'fileid://{file_id}' + }, + { + 'role': 'user', + 'content': user_query + } + ], + stream=False + ) + + # Return the response content + return completion.choices[0].message.content if __name__ == "__main__": # Example file path - replace with your actual file path diff --git a/flask_app/old_version/不得存在及禁止投标情形.py b/flask_app/old_version/不得存在及禁止投标情形.py index 5088bdf..9a4fe1b 100644 --- a/flask_app/old_version/不得存在及禁止投标情形.py +++ b/flask_app/old_version/不得存在及禁止投标情形.py @@ -4,7 +4,7 @@ import re from PyPDF2 import PdfWriter, PdfReader -from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text +from flask_app.general.通义千问long import upload_file, qianwen_long from flask_app.general.通用功能函数 import process_string_list @@ -144,7 +144,7 @@ def find_forbidden(qualification=""): "该招标文件规定的投标人不得存在的其他情形有哪些,请以列表给我提供信息,形如[xx,xx,...]," "请你不要回答有关\"信誉要求\"的内容,若原文未提及,返回[]。" ) - qianwen_forbidden_str = qianwen_long_text(file_id, user_query_forbidden) + qianwen_forbidden_str = qianwen_long(file_id, user_query_forbidden) else: qianwen_forbidden_str = "[]" diff --git a/flask_app/old_version/无效标和废标和禁止投标整合.py b/flask_app/old_version/无效标和废标和禁止投标整合.py index 19b20ff..20fdb94 100644 --- a/flask_app/old_version/无效标和废标和禁止投标整合.py +++ b/flask_app/old_version/无效标和废标和禁止投标整合.py @@ -5,7 +5,7 @@ import time import re from flask_app.general.format_change import pdf2docx -from flask_app.general.通义千问long import upload_file, qianwen_long_text +from flask_app.general.通义千问long import upload_file, qianwen_long from concurrent.futures import ThreadPoolExecutor from flask_app.general.table_content_extraction import extract_tables_main @@ -345,7 +345,7 @@ def handle_query(file_path, user_query, output_file, result_key, keywords, trunc file_id = upload_file(output_file) # qianwen_ans = qianwen_long(file_id, user_query) - qianwen_ans = qianwen_long_text(file_id, user_query) + qianwen_ans = qianwen_long(file_id, user_query) num_list = process_string_list(qianwen_ans) print(result_key + "选中的序号:" + str(num_list)) diff --git a/flask_app/货物标/评分标准提取main.py b/flask_app/货物标/评分标准提取main.py index a73c42e..c422e8d 100644 --- a/flask_app/货物标/评分标准提取main.py +++ b/flask_app/货物标/评分标准提取main.py @@ -3,7 +3,7 @@ import json import re import time from collections import defaultdict -from flask_app.general.通义千问long import upload_file, qianwen_long, qianwen_long_text +from flask_app.general.通义千问long import upload_file, qianwen_long def combine_technical_and_business(data, target_values): @@ -211,7 +211,7 @@ def combine_evaluation_standards(truncate_file): ) # 应对竞争性谈判这种无评分要求的情况 # 执行查询 - judge_res = qianwen_long_text(file_id, user_query1) + judge_res = qianwen_long(file_id, user_query1) # 默认 judge 为 True judge = True @@ -268,7 +268,7 @@ def combine_evaluation_standards(truncate_file): """ ) # 执行第二个查询 - evaluation_res = qianwen_long_text(file_id, user_query) #有些重复的键名,只有qianwen_long_text能保留 + evaluation_res = qianwen_long(file_id, user_query) #有些重复的键名,只有qianwen_long_text能保留 # print(evaluation_res) # 清理和处理响应 cleaned_evaluation_res = parse_json_with_duplicates(evaluation_res) #处理重复键名的情况 diff --git a/requirements.txt b/requirements.txt index 55e8bf7..d2cda5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ dashscope==1.19.2 PyMuPDF==1.24.1 openai==1.33.0 pathlib==1.0.1 -alibabacloud_bailian20231229==1.7.0 \ No newline at end of file +alibabacloud_bailian20231229==1.7.0 +ratelimit==2.2.1 \ No newline at end of file