2024.8.1
This commit is contained in:
parent
bda2c87625
commit
bc540c7be8
@ -53,7 +53,7 @@ def process_table(table_rows):
|
||||
results["行政处罚决定"] = clean_text(str(table_rows[7].find_all('td')[1]))
|
||||
results["作出处罚决定的机关名称"] = clean_text(str(table_rows[8].find_all('td')[1]))
|
||||
results["作出处罚决定的日期"] = clean_text(str(table_rows[9].find_all('td')[1]))
|
||||
|
||||
#TODO:键固定,值动态
|
||||
else:
|
||||
temp_dict = {}
|
||||
for row in table_rows:
|
||||
@ -178,7 +178,7 @@ def process_in_batches(urls, batch_size=100):
|
||||
with pd.ExcelWriter('output_data2.xlsx', engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
|
||||
combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
|
||||
#TODO:初始创建表头字段
|
||||
# 读取URL列表
|
||||
with open('url2.txt', 'r') as file:
|
||||
urls = [line.strip() for line in file if line.strip()]
|
||||
|
9
urls.txt
9
urls.txt
@ -1,9 +0,0 @@
|
||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1144537&itemId=4115&generaltype=9
|
||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1148013&itemId=4115&generaltype=9
|
||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1138922&itemId=4115&generaltype=9
|
||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1138462&itemId=4115&generaltype=9
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user