0

我有一个具有以下结构的 JSON 文件(以下是 的内容retweets[:2]):

[{'lang': 'en',
  'author_id': '1076979440372965377',
  'reply_settings': 'everyone',
  'entities': {'mentions': [{'start': 3,
     'end': 17,
     'username': 'Terry81987010',
     'url': '',
     'location': 'Florida',
     'entities': {'description': {'hashtags': [{'start': 29,
         'end': 32,
         'tag': '2A'}]}},
     'created_at': '2019-02-01T23:01:11.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 520,
      'following_count': 567,
      'tweet_count': 34376,
      'listed_count': 1},
     'name': "Terry's Take",
     'verified': False,
     'id': '1091471553437593605',
     'description': 'Less government more Freedom #2A is a constitutional right. Trump2020, common sense rules, God bless America! Vet 82nd Airborne F/A, proud Republican',
     'profile_image_url': 'https://pbs.twimg.com/profile_images/1289626661911134208/WfztLkr1_normal.jpg'},
    {'start': 19,
     'end': 32,
     'username': 'DineshDSouza',
     'location': 'United States',
     'entities': {'url': {'urls': [{'start': 0,
         'end': 23,
         'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
         'display_url': 'podcasts.apple.com/us/podcast/the…'}]},
      'description': {'urls': [{'start': 80,
         'end': 103,
         'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
         'display_url': 'podcasts.apple.com/us/podcast/the…'}]}},
     'created_at': '2009-11-22T22:32:41.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 1748832,
      'following_count': 5355,
      'tweet_count': 65674,
      'listed_count': 6966},
     'name': "Dinesh D'Souza",
     'verified': True,
     'pinned_tweet_id': '1393309917239562241',
     'id': '91882544',
     'description': "I am an author, filmmaker, and host of the Dinesh D'Souza Podcast.\n\nSubscribe: ",
     'profile_image_url': 'https://pbs.twimg.com/profile_images/890967538292711424/8puyFbiI_normal.jpg'}]},
  'conversation_id': '1253462541881106433',
  'created_at': '2020-04-23T23:15:32.000Z',
  'id': '1253462541881106433',
  'possibly_sensitive': False,
  'referenced_tweets': [{'type': 'retweeted',
    'id': '1253052684489437184',
    'in_reply_to_user_id': '91882544',
    'attachments': {'media_keys': ['3_1253052312144293888',
      '3_1253052620937277442'],
     'media': [{}, {}]},
    'entities': {'annotations': [{'start': 126,
       'end': 128,
       'probability': 0.514,
       'type': 'Organization',
       'normalized_text': 'CDC'},
      {'start': 145,
       'end': 146,
       'probability': 0.5139,
       'type': 'Place',
       'normalized_text': 'NY'}],
     'mentions': [{'start': 0,
       'end': 13,
       'username': 'DineshDSouza',
       'location': 'United States',
       'entities': {'url': {'urls': [{'start': 0,
           'end': 23,
           'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
           'display_url': 'podcasts.apple.com/us/podcast/the…'}]},
        'description': {'urls': [{'start': 80,
           'end': 103,
           'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
           'display_url': 'podcasts.apple.com/us/podcast/the…'}]}},
       'created_at': '2009-11-22T22:32:41.000Z',
       'protected': False,
       'public_metrics': {'followers_count': 1748832,
        'following_count': 5355,
        'tweet_count': 65674,
        'listed_count': 6966},
       'name': "Dinesh D'Souza",
       'verified': True,
       'pinned_tweet_id': '1393309917239562241',
       'id': '91882544',
       'description': "I am an author, filmmaker, and host of the Dinesh D'Souza Podcast.\n\nSubscribe:  ",
       'profile_image_url': 'https://pbs.twimg.com/profile_images/890967538292711424/8puyFbiI_normal.jpg'}],
     'urls': [{'start': 187,
       'end': 210,
       'expanded_url': 'https://twitter.com/Terry81987010/status/1253052684489437184/photo/1',
       'display_url': 'pic.twitter.com/H4NpN5ZMkW'},
      {'start': 187,
       'end': 210,
       'expanded_url': 'https://twitter.com/Terry81987010/status/1253052684489437184/photo/1',
       'display_url': 'pic.twitter.com/H4NpN5ZMkW'}]},
    'lang': 'en',
    'author_id': '1091471553437593605',
    'reply_settings': 'everyone',
    'conversation_id': '1253050942716551168',
    'created_at': '2020-04-22T20:06:55.000Z',
    'possibly_sensitive': False,
    'referenced_tweets': [{'type': 'replied_to', 'id': '1253050942716551168'}],
    'public_metrics': {'retweet_count': 208,
     'reply_count': 57,
     'like_count': 402,
     'quote_count': 38},
    'source': 'Twitter Web App',
    'text': "@DineshDSouza Here's some proof of artificially inflating the cv deaths. Noone is dying of pneumonia anymore according to the CDC. And of course NY getting paid for each cv death $60,000",
    'context_annotations': [{'domain': {'id': '10',
       'name': 'Person',
       'description': 'Named people in the world like Nelson Mandela'},
      'entity': {'id': '1138120064119369729', 'name': "Dinesh D'Souza"}},
     {'domain': {'id': '35',
       'name': 'Politician',
       'description': 'Politicians in the world, like Joe Biden'},
      'entity': {'id': '1138120064119369729', 'name': "Dinesh D'Souza"}}],
    'author': {'url': '',
     'username': 'Terry81987010',
     'location': 'Florida',
     'entities': {'description': {'hashtags': [{'start': 29,
         'end': 32,
         'tag': '2A'}]}},
     'created_at': '2019-02-01T23:01:11.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 520,
      'following_count': 567,
      'tweet_count': 34376,
      'listed_count': 1},
     'name': "Terry's Take",
     'verified': False,
     'id': '1091471553437593605',
     'description': 'Less government more Freedom #2A is a constitutional right. Trump2020, common sense rules, God bless America! Vet 82nd Airborne F/A, proud Republican',
     'profile_image_url': 'https://pbs.twimg.com/profile_images/1289626661911134208/WfztLkr1_normal.jpg'},
    'in_reply_to_user': {'username': 'DineshDSouza',
     'location': 'United States',
     'entities': {'url': {'urls': [{'start': 0,
         'end': 23,
         'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
         'display_url': 'podcasts.apple.com/us/podcast/the…'}]},
      'description': {'urls': [{'start': 80,
         'end': 103,
         'expanded_url': 'https://podcasts.apple.com/us/podcast/the-dinesh-dsouza-podcast/id1547827376',
         'display_url': 'podcasts.apple.com/us/podcast/the…'}]}},
     'created_at': '2009-11-22T22:32:41.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 1748832,
      'following_count': 5355,
      'tweet_count': 65674,
      'listed_count': 6966},
     'name': "Dinesh D'Souza",
     'verified': True,
     'pinned_tweet_id': '1393309917239562241',
     'id': '91882544',
     'description': "I am an author, filmmaker, and host of the Dinesh D'Souza Podcast.\n\nSubscribe:  ",
     'profile_image_url': 'https://pbs.twimg.com/profile_images/890967538292711424/8puyFbiI_normal.jpg'}}],
  'public_metrics': {'retweet_count': 208,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0},
  'source': 'Twitter for iPhone',
  'text': "RT @Terry81987010: @DineshDSouza Here's some proof of artificially inflating the cv deaths. Noone is dying of pneumonia anymore according t…",
  'context_annotations': [{'domain': {'id': '10',
     'name': 'Person',
     'description': 'Named people in the world like Nelson Mandela'},
    'entity': {'id': '1138120064119369729', 'name': "Dinesh D'Souza"}},
   {'domain': {'id': '35',
     'name': 'Politician',
     'description': 'Politicians in the world, like Joe Biden'},
    'entity': {'id': '1138120064119369729', 'name': "Dinesh D'Souza"}}],
  'author': {'url': '',
   'username': 'set1952',
   'location': 'Etats-Unis',
   'created_at': '2018-12-23T23:14:42.000Z',
   'protected': False,
   'public_metrics': {'followers_count': 103,
    'following_count': 44,
    'tweet_count': 44803,
    'listed_count': 0},
   'name': 'SunSet1952',
   'verified': False,
   'id': '1076979440372965377',
   'description': '',
   'profile_image_url': 'https://abs.twimg.com/sticky/default_profile_images/default_profile_normal.png'},
  '__twarc': {'url': 'https://api.twitter.com/2/tweets/search/all?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&media.fields=duration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&max_results=500&query=retweets_of%3ATerry81987010&start_time=2020-03-09T00%3A00%3A00%2B00%3A00&end_time=2020-04-24T00%3A00%3A00%2B00%3A00',
   'version': '2.0.8',
   'retrieved_at': '2021-05-17T17:13:17+00:00'}},
 {'attachments': {'media_keys': ['3_1253366603061878792'],
   'media': [{'media_key': '3_1253366603061878792',
     'type': 'photo',
     'height': 1920,
     'width': 1080,
     'url': 'https://pbs.twimg.com/media/EWTaSOzXYAgBEqO.jpg'}]},
  'lang': 'en',
  'author_id': '157054921',
  'reply_settings': 'everyone',
  'entities': {'mentions': [{'start': 3,
     'end': 17,
     'username': 'Terry81987010',
     'url': '',
     'location': 'Florida',
     'entities': {'description': {'hashtags': [{'start': 29,
         'end': 32,
         'tag': '2A'}]}},
     'created_at': '2019-02-01T23:01:11.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 520,
      'following_count': 567,
      'tweet_count': 34376,
      'listed_count': 1},
     'name': "Terry's Take",
     'verified': False,
     'id': '1091471553437593605',
     'description': 'Less government more Freedom #2A is a constitutional right. Trump2020, common sense rules, God bless America! Vet 82nd Airborne F/A, proud Republican',
     'profile_image_url': 'https://pbs.twimg.com/profile_images/1289626661911134208/WfztLkr1_normal.jpg'},
    {'start': 19, 'end': 31, 'username': 'mitchellvii'}],
   'urls': [{'start': 60,
     'end': 83,
     'expanded_url': 'https://twitter.com/Terry81987010/status/1253366701112070147/photo/1',
     'display_url': 'pic.twitter.com/mQLE5TZviE'}]},
  'conversation_id': '1253446311753453576',
  'created_at': '2020-04-23T22:11:03.000Z',
  'id': '1253446311753453576',
  'possibly_sensitive': False,
  'referenced_tweets': [{'type': 'retweeted',
    'id': '1253366701112070147',
    'in_reply_to_user_id': '17980523',
    'attachments': {'media_keys': ['3_1253366603061878792'],
     'media': [{'media_key': '3_1253366603061878792',
       'type': 'photo',
       'height': 1920,
       'width': 1080,
       'url': 'https://pbs.twimg.com/media/EWTaSOzXYAgBEqO.jpg'}]},
    'lang': 'en',
    'author_id': '1091471553437593605',
    'reply_settings': 'everyone',
    'entities': {'mentions': [{'start': 0,
       'end': 12,
       'username': 'mitchellvii'}],
     'urls': [{'start': 41,
       'end': 64,
       'expanded_url': 'https://twitter.com/Terry81987010/status/1253366701112070147/photo/1',
       'display_url': 'pic.twitter.com/mQLE5TZviE'}]},
    'conversation_id': '1253364124416978951',
    'created_at': '2020-04-23T16:54:42.000Z',
    'possibly_sensitive': False,
    'referenced_tweets': [{'type': 'replied_to', 'id': '1253364124416978951'}],
    'public_metrics': {'retweet_count': 4,
     'reply_count': 1,
     'like_count': 4,
     'quote_count': 0},
    'source': 'Twitter Web App',
    'text': '@mitchellvii Is it really a coincidence?',
    'context_annotations': [{'domain': {'id': '10',
       'name': 'Person',
       'description': 'Named people in the world like Nelson Mandela'},
      'entity': {'id': '1138048048972517376',
       'name': 'Bill Mitchell',
       'description': 'Professor'}}],
    'author': {'url': '',
     'username': 'Terry81987010',
     'location': 'Florida',
     'entities': {'description': {'hashtags': [{'start': 29,
         'end': 32,
         'tag': '2A'}]}},
     'created_at': '2019-02-01T23:01:11.000Z',
     'protected': False,
     'public_metrics': {'followers_count': 520,
      'following_count': 567,
      'tweet_count': 34376,
      'listed_count': 1},
     'name': "Terry's Take",
     'verified': False,
     'id': '1091471553437593605',
     'description': 'Less government more Freedom #2A is a constitutional right. Trump2020, common sense rules, God bless America! Vet 82nd Airborne F/A, proud Republican',
     'profile_image_url': 'https://pbs.twimg.com/profile_images/1289626661911134208/WfztLkr1_normal.jpg'},
    'in_reply_to_user': {}}],
  'public_metrics': {'retweet_count': 4,
   'reply_count': 0,
   'like_count': 0,
   'quote_count': 0},
  'source': 'Twitter for iPhone',
  'text': 'RT @Terry81987010: @mitchellvii Is it really a coincidence?',
  'context_annotations': [{'domain': {'id': '10',
     'name': 'Person',
     'description': 'Named people in the world like Nelson Mandela'},
    'entity': {'id': '1138048048972517376',
     'name': 'Bill Mitchell',
     'description': 'Professor'}}],
  'author': {'url': '',
   'username': 'BillyBoysDaddy',
   'entities': {'description': {'hashtags': [{'start': 93,
       'end': 98,
       'tag': 'MAGA'}]}},
   'created_at': '2010-06-18T18:29:28.000Z',
   'protected': False,
   'public_metrics': {'followers_count': 10510,
    'following_count': 9001,
    'tweet_count': 444455,
    'listed_count': 171},
   'name': 'Sonlight ',
   'verified': False,
   'id': '157054921',
   'description': 'The way that it is ... is ... the way that it is ...... and it could not be otherwise ...... #MAGA',
   'profile_image_url': 'https://pbs.twimg.com/profile_images/3464880142/bb34db1c7344eca5bd8ce7196ff0e003_normal.jpeg'},
  '__twarc': {'url': 'https://api.twitter.com/2/tweets/search/all?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&media.fields=duration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&max_results=500&query=retweets_of%3ATerry81987010&start_time=2020-03-09T00%3A00%3A00%2B00%3A00&end_time=2020-04-24T00%3A00%3A00%2B00%3A00',
   'version': '2.0.8',
   'retrieved_at': '2021-05-17T17:13:17+00:00'}}]

我需要什么: 我必须将 JSON 文件导入到 Pandas 数据框中,以便每个字段都有一个单独的列(即,完全展平嵌套列表)。例如,不应只有一列referenced_tweets包含typeid等,而应为每一列分别设置。

这是我的代码:

retweets = []
for line in open('Data/usersRetweetsFlatten_sample.json', 'r'):
    retweets.append(json.loads(line))

df_retweets = json_normalize(
    retweets, 
    record_path=['referenced_tweets'], 
    meta_prefix=".", 
    meta= ["text", "created_at", "author_id", "conversation_id", "id"], 
    errors='ignore'
)

df_retweets[['author.username', 'type', 'id', '.id', 'referenced_tweets']].head()

问题:df_retweets.head()当我运行最后一行id代码(例如,该列的值始终为,而 referenced_tweets 列中的类型为!!!! 请参阅所附结果数据框的屏幕截图。typereferenced_tweetstyperetweetedreplied_to

在此处输入图像描述

4

1 回答 1

1

输出与预期一致,并且与您的 json 结构一致,该结构非常复杂(多级)。

“类型”列从一个级别派生它的值。

>>> retweets[0]["referenced_tweets"][0]["type"]
"retweeted"

虽然“referenced_tweets”列来自更嵌套(即更深)的级别:

>>> retweets[0]["referenced_tweets"][0]["referenced_tweets"][0]["type"]
'replied_to'
于 2021-08-05T20:08:12.763 回答