2024-08-29 17:30:49 +08:00
|
|
|
|
from flask_app.main.按页读取pdf import extract_text_by_page
|
2024-08-29 16:37:09 +08:00
|
|
|
|
|
|
|
|
|
def check_strings_in_pdf(file_path):
|
|
|
|
|
judge_list=['施工机械设备', '企业信息登记']
|
|
|
|
|
# Read text from PDF
|
|
|
|
|
text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF
|
|
|
|
|
full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text
|
|
|
|
|
|
|
|
|
|
# Initialize the questions list
|
|
|
|
|
ques_list = []
|
|
|
|
|
|
|
|
|
|
# Check for each string in the judge_list and construct questions accordingly
|
|
|
|
|
if judge_list[0] in full_text:
|
|
|
|
|
ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[0]}',若存在未知信息,在对应的键值中填'未知'。")
|
|
|
|
|
if len(judge_list) > 1 and judge_list[1] in full_text:
|
|
|
|
|
ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[1]}',若存在未知信息,在对应的键值中填'未知'。")
|
|
|
|
|
|
|
|
|
|
if not ques_list:
|
|
|
|
|
return None
|
|
|
|
|
return ques_list
|
|
|
|
|
|
|
|
|
|
# Test cases or example usage
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf' # Replace with your actual PDF file path
|
|
|
|
|
judge_list = ['施工机械设备', '企业信息登记'] # List of strings to check in the PDF
|
|
|
|
|
|
|
|
|
|
questions = check_strings_in_pdf(file_path, judge_list)
|
|
|
|
|
for question in questions:
|
|
|
|
|
print(question)
|