zbparse/flask_app/main/资格评审前判断.py

30 lines
1.4 KiB
Python
Raw Normal View History

2024-08-29 17:30:49 +08:00
from flask_app.main.按页读取pdf import extract_text_by_page
2024-08-29 16:37:09 +08:00
def check_strings_in_pdf(file_path):
judge_list=['施工机械设备', '企业信息登记']
# Read text from PDF
text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF
full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text
# Initialize the questions list
ques_list = []
# Check for each string in the judge_list and construct questions accordingly
if judge_list[0] in full_text:
ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的请按json格式给我提供信息键名为'{judge_list[0]}',若存在未知信息,在对应的键值中填'未知'")
if len(judge_list) > 1 and judge_list[1] in full_text:
ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的请按json格式给我提供信息键名为'{judge_list[1]}',若存在未知信息,在对应的键值中填'未知'")
if not ques_list:
return None
return ques_list
# Test cases or example usage
if __name__ == '__main__':
file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf' # Replace with your actual PDF file path
judge_list = ['施工机械设备', '企业信息登记'] # List of strings to check in the PDF
questions = check_strings_in_pdf(file_path, judge_list)
for question in questions:
print(question)