0

我有一个嵌套for循环,它调用 spotipy 和歌词天才 API,目的是构建一个包含歌曲信息及其相关歌词的数据框。然而,在提取前十位艺术家的信息后,对 spotipy api 的持续请求会导致“请求异常超时错误”。

这是代码:

import requests
import pandas as pd
from lyricsgenius import genius
from requests.models import ReadTimeoutError
import json
import time
from collections import defaultdict
from pathlib import Path
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
#import os
import json
import re 




#leveraging the client ID to pull Genius data
api = genius.Genius("******")


#bring in spotipy api
client_credentials_manager = SpotifyClientCredentials(...,requests_timeout=10000)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

sample_artists = ['Saigon','Rich Boy','Gorilla Zoe','Lupe Fiasco'...]
lyric_read = pd.read_csv('XXLFreshmanList.csv')



query_number = 0
tracks = []
for artist in sample_artists:
    query_number += 1
    artists = []
    titles = [] 
    year =  []
    lyrics = []
    popularity = []
    print('\nQuery number:', query_number)
    #was dealing with the lyricgenius API, where it would time out after a few seconds, this solution continues to try the API call until it works 
    time.sleep(0.01)
    while True:
        try:
            artist = api.search_artist(artist, max_songs=10, sort = 'popularity', allow_name_change=True, include_features=False)
            break
        except:
            pass
    songs = artist.songs
    song_number = 0
    for song in songs:
        if song is not None:
            song_number += 1
            print('\nSong number:', song_number)
            print('\nNow adding: Artist')
            artists.append(song.artist)
            thisong = song.artist
            print('Now adding: Title')
            titles.append(song.title)
            thistitle = song.title
            print('Now adding: Lyrics')
            lyrics.append(song.lyrics)
            search_query = "artist:" + thisong + " track:" + thistitle
            try:
                sp_search = sp.search(q = search_query, limit= 1, offset = 0, market= 'US', type = 'track')
            except requests.exceptions.Timeout:
                print("Timeout occured")
            json_obj1 = json.dumps(sp_search)
            json_obj2 = json.loads(json_obj1)
            tracknum = 0
            for track in json_obj2:
                try:
                    popularity_json = json_obj2["tracks"]["items"][tracknum]["popularity"]
                    tracknum += 1
                except IndexError:
                    print("index error")
                    popularity_json = 0
                except ConnectionResetError:
                    print("connection reset error")
                    popularity_json = 0
                if popularity_json is not None:
                    popularity.append(popularity_json)
                
    a = {'artist':artists,  'title':titles, 'lyrics':lyrics, 'popularity': popularity}
     ..Dataframe stuff...

这是错误(出于隐私考虑删除了某些部分):

Traceback (most recent call last):
  File "c:\...", line 98, in <module>
    sp_search = sp.search(q = search_query, limit= 1, offset = 0, market= 'US', type = 'track')
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\spotipy\client.py", line 544, in search
    return self._get(
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\spotipy\client.py", line 291, in _get
    return self._internal_call("GET", url, payload, kwargs)
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\spotipy\client.py", line 240, in _internal_call
    response = self._session.request(
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\requests\sessions.py", line 542, in request
    resp = self.send(prep, **send_kwargs)
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\requests\sessions.py", line 655, in send
    r = adapter.send(request, **kwargs)
  File "..._qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\requests\adapters.py", line 498, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

当出现上述连接错误时,有什么方法可以修改代码以继续运行?

4

0 回答 0