from flask_app.general.读取文件.按页读取pdf import extract_text_by_page def check_strings_in_pdf(file_path): judge_list=['施工机械设备', '企业信息登记'] # Read text from PDF text = extract_text_by_page(file_path) # Assuming this returns all text from the PDF full_text = ''.join(text).replace('\n', '').replace(' ', '') # Clean up the text # Initialize the questions list ques_list = [] # Check for each string in the judge_list and construct questions accordingly if judge_list[0] in full_text: ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[0]}',若存在未知信息,在对应的键值中填'未知'。") if len(judge_list) > 1 and judge_list[1] in full_text: ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的,请按json格式给我提供信息,键名为'{judge_list[1]}',若存在未知信息,在对应的键值中填'未知'。") if not ques_list: return None return ques_list # Test cases or example usage if __name__ == '__main__': file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf' # Replace with your actual PDF file path judge_list = ['施工机械设备', '企业信息登记'] # List of strings to check in the PDF questions = check_strings_in_pdf(file_path, judge_list) for question in questions: print(question)