zbparse/flask_app/main/资格评审前判断.py

from 按页读取pdf import extract_text_by_page

def check_strings_in_pdf(file_path):
    judge_list=['施工机械设备', '企业信息登记']
    # Read text from PDF
    text = extract_text_by_page(file_path)  # Assuming this returns all text from the PDF
    full_text = ''.join(text).replace('\n', '').replace(' ', '')  # Clean up the text

    # Initialize the questions list
    ques_list = []

    # Check for each string in the judge_list and construct questions accordingly
    if judge_list[0] in full_text:
        ques_list.append(f"该招标文件对于'{judge_list[0]}'的要求是怎样的，请按json格式给我提供信息，键名为'{judge_list[0]}'，若存在未知信息，在对应的键值中填'未知'。")
    if len(judge_list) > 1 and judge_list[1] in full_text:
        ques_list.append(f"该招标文件对于'{judge_list[1]}'的要求是怎样的，请按json格式给我提供信息，键名为'{judge_list[1]}'，若存在未知信息，在对应的键值中填'未知'。")

    if not ques_list:
        return None
    return ques_list

# Test cases or example usage
if __name__ == '__main__':
    file_path = 'C:/Users/Administrator/Desktop/zbtest18_39-45.pdf'  # Replace with your actual PDF file path
    judge_list = ['施工机械设备', '企业信息登记']  # List of strings to check in the PDF

    questions = check_strings_in_pdf(file_path, judge_list)
    for question in questions:
        print(question)