2024.8.1
This commit is contained in:
parent
bda2c87625
commit
bc540c7be8
@ -53,7 +53,7 @@ def process_table(table_rows):
|
|||||||
results["行政处罚决定"] = clean_text(str(table_rows[7].find_all('td')[1]))
|
results["行政处罚决定"] = clean_text(str(table_rows[7].find_all('td')[1]))
|
||||||
results["作出处罚决定的机关名称"] = clean_text(str(table_rows[8].find_all('td')[1]))
|
results["作出处罚决定的机关名称"] = clean_text(str(table_rows[8].find_all('td')[1]))
|
||||||
results["作出处罚决定的日期"] = clean_text(str(table_rows[9].find_all('td')[1]))
|
results["作出处罚决定的日期"] = clean_text(str(table_rows[9].find_all('td')[1]))
|
||||||
|
#TODO:键固定,值动态
|
||||||
else:
|
else:
|
||||||
temp_dict = {}
|
temp_dict = {}
|
||||||
for row in table_rows:
|
for row in table_rows:
|
||||||
@ -178,7 +178,7 @@ def process_in_batches(urls, batch_size=100):
|
|||||||
with pd.ExcelWriter('output_data2.xlsx', engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
|
with pd.ExcelWriter('output_data2.xlsx', engine='openpyxl', mode='a', if_sheet_exists='overlay') as writer:
|
||||||
combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
|
combined_data.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||||
|
|
||||||
|
#TODO:初始创建表头字段
|
||||||
# 读取URL列表
|
# 读取URL列表
|
||||||
with open('url2.txt', 'r') as file:
|
with open('url2.txt', 'r') as file:
|
||||||
urls = [line.strip() for line in file if line.strip()]
|
urls = [line.strip() for line in file if line.strip()]
|
||||||
|
9
urls.txt
9
urls.txt
@ -1,9 +0,0 @@
|
|||||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1144537&itemId=4115&generaltype=9
|
|
||||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1148013&itemId=4115&generaltype=9
|
|
||||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1138922&itemId=4115&generaltype=9
|
|
||||||
https://www.cbirc.gov.cn/cn/view/pages/ItemDetail.html?docId=1138462&itemId=4115&generaltype=9
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user