30 lines
1.4 KiB
Python
30 lines
1.4 KiB
Python
from 按页读取pdf import extract_text_by_page
|
||
|
||
def check_strings_in_pdf(file_path):
|
||
judge_list=['施工机械设备', '企业信息登记']
|
||
# Read text from PDF
|
||
text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF
|
||
full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text
|
||
|
||
# Initialize the questions list
|
||
ques_list = []
|
||
|
||
# Check for each string in the judge_list and construct questions accordingly
|
||
if judge_list[0] in full_text:
|
||
ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[0]}',若存在未知信息,在对应的键值中填'未知'。")
|
||
if len(judge_list) > 1 and judge_list[1] in full_text:
|
||
ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[1]}',若存在未知信息,在对应的键值中填'未知'。")
|
||
|
||
if not ques_list:
|
||
return None
|
||
return ques_list
|
||
|
||
# Test cases or example usage
|
||
if __name__ == '__main__':
|
||
file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf' # Replace with your actual PDF file path
|
||
judge_list = ['施工机械设备', '企业信息登记'] # List of strings to check in the PDF
|
||
|
||
questions = check_strings_in_pdf(file_path, judge_list)
|
||
for question in questions:
|
||
print(question)
|