0

将bs4抓取数据转换为excel时,只有最后一个数据作为输出

如果它是带有代码和主题标签的指导代码或解释也可以。

网站链接

import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}    
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()

for doctor in data['docs']:
    doctor_name = (f"{doctor['Name']}")
    specialty = (f"{doctor['PrimarySpecialty']}")
    facility = (f"{doctor['PrimaryFacility']}")
    info = [doctor_name, specialty, facility]
    #print(info)
    #info = (f"{doctor['Name']:30}  {doctor['PrimarySpecialty']:20}  {doctor['PrimaryFacility']}")
    #print(info)
    df = pd.DataFrame({'Doctor Name':doctor_name,'Price':specialty,'Rating':facility}, index=['0'])
    df.to_csv('products.csv', index=['0'], encoding='utf-8')
    print(info)

1

4

2 回答 2

2

因为准备和保存到 csv 是在循环内,并且每次都会覆盖最后一个条目,所以它只保留最后一行,你实际上不需要循环:

import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()
print(data['docs'])
df = pd.DataFrame(data['docs'])

df.to_csv('products.csv', encoding='utf-8')
于 2022-01-31T16:04:43.563 回答
2
import requests
import pandas as pd

headers = {'Authorization' : 'eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwOi8vc2NoZW1hcy54bWxzb2FwLm9yZy93cy8yMDA1LzA1L2lkZW50aXR5L2NsYWltcy9uYW1lIjoiYWRtaW4iLCJleHAiOjIxMjcwNDQ1MTcsImlzcyI6Imh0dHBzOi8vZGV2ZWxvcGVyLmhlYWx0aHBvc3QuY29tIiwiYXVkIjoiaHR0cHM6Ly9kZXZlbG9wZXIuaGVhbHRocG9zdC5jb20ifQ.zNvR3WpI17CCMC7rIrHQCrnJg_6qGM21BvTP_ed_Hj8'}    
json_post = {"query":"","start":0,"rows":10,"selectedFilters":{"availability":[],"clinicalInterest":[],"distance":[20],"gender":["Both"],"hasOnlineScheduling":False,"insurance":[],"isMHMG":False,"language":[],"locationType":[],"lonlat":[-95.36,29.76],"onlineScheduling":["Any"],"specialty":["Gastroenterology"]}}
req = requests.post("https://api.memorialhermann.org/api/doctorsearch", json=json_post, headers=headers)
data = req.json()

data_all=[]
for doctor in data['docs']:
    doctor_name = (f"{doctor['Name']}")
    specialty = (f"{doctor['PrimarySpecialty']}")
    facility = (f"{doctor['PrimaryFacility']}")
    info = [doctor_name, specialty, facility]
    data_all.append({'Doctor Name':doctor_name,'Price':specialty,'Rating':facility})
df = pd.DataFrame(data_all)
df.to_csv('products.csv',encoding='utf-8')
于 2022-01-31T16:05:20.233 回答