zbparse/flask_app/main/资格评审前判断.py
2024-08-29 16:37:09 +08:00

30 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from 按页读取pdf import extract_text_by_page
def check_strings_in_pdf(file_path):
judge_list=['施工机械设备', '企业信息登记']
# Read text from PDF
text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF
full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text
# Initialize the questions list
ques_list = []
# Check for each string in the judge_list and construct questions accordingly
if judge_list[0] in full_text:
ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的请按json格式给我提供信息键名为'{judge_list[0]}',若存在未知信息,在对应的键值中填'未知'")
if len(judge_list) > 1 and judge_list[1] in full_text:
ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的请按json格式给我提供信息键名为'{judge_list[1]}',若存在未知信息,在对应的键值中填'未知'")
if not ques_list:
return None
return ques_list
# Test cases or example usage
if __name__ == '__main__':
file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf' # Replace with your actual PDF file path
judge_list = ['施工机械设备', '企业信息登记'] # List of strings to check in the PDF
questions = check_strings_in_pdf(file_path, judge_list)
for question in questions:
print(question)