python - 从嵌套字典到 python Dataframe

Question

我有一个嵌套字典的示例，如下所示：

data =  [{
         'resultInfo': {
             'load': None,
             'unload': {
                 'weight': 59.0,
                 'unit': 'ton',
                 'tonsPerTeu': None,
                 'tonsPerFeu': None,
                 'freightId': None,
                 'showEmissionsAtResponse': True
             },
             'location': 'zip:63937',
             'freightId': None,
             'emissionPercentage': 1.0,
             'directDistance': 767.71
         },
         'emissions': {
             'primaryEnergy': {
                 'rail': None,
                 'sea': None,
                 'air': None,
                 'inlandWaterways': None,
                 'road': {
                     '_value_1': Decimal('70351.631210000000'),
                     'wellToTank': Decimal('13412'),
                     'tankToWheel': Decimal('56939')
                 },
                 'logisticsite': None,
                 'transfer': None,
                 'unit': 'MegaJoule'
             },
             'carbonDioxide': {
                 'rail': None,
                 'sea': None,
                 'air': None,
                 'inlandWaterways': None,
                 'road': {
                     '_value_1': Decimal('4.866239643000'),
                     'wellToTank': Decimal('0.902'),
                     'tankToWheel': Decimal('3.963')
                 }
    }]

是type(data)一个列表。

我想把它放在数据帧格式上，这样预期的输出是这样的：

primaryEnergy_wellToTank    primaryEnergy_tankToWheel   carbonDioxide_wellToTank    carbonDioxide_tankToWheel
                   13412                        56939                      0.902                        3.963

我尝试了 pd.Dataframe 函数的一些转换：

df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))

但到目前为止，结果并不真正成功。

怎么可能做到这一点？

以下是我使用时遇到的错误df = pd.json_normalize(data)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
    142         try:
--> 143             return self.__values__[key]
    144         except KeyError:

KeyError: 'values'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-180-cc2694b5448e> in <module>
----> 1 df = pd.json_normalize(result.result)

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in _json_normalize(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)
    272 
    273     if record_path is None:
--> 274         if any([isinstance(x, dict) for x in y.values()] for y in data):
    275             # naive normalization, this is idempotent for flat records
    276             # and potentially will inflate the data considerably for

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in <genexpr>(.0)
    272 
    273     if record_path is None:
--> 274         if any([isinstance(x, dict) for x in y.values()] for y in data):
    275             # naive normalization, this is idempotent for flat records
    276             # and potentially will inflate the data considerably for

~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
    144         except KeyError:
    145             raise AttributeError(
--> 146                 "%s instance has no attribute '%s'" % (self.__class__.__name__, key)
    147             )
    148 

AttributeError: DistributionLoadResult instance has no attribute 'values'

serialize_object我可以通过使用该功能来解决问题。

score 4 · Accepted Answer

如果list看起来像list of dicts这篇文章底部的resultInfo重复，那么你可以使用json_normalize
创建df后删除不需要的列pandas.DataFrame.drop

import pandas as pd

df = pd.json_normalize(data)

# display(df)
  resultInfo.load  resultInfo.unload.weight resultInfo.unload.unit resultInfo.unload.tonsPerTeu resultInfo.unload.tonsPerFeu resultInfo.unload.freightId  resultInfo.unload.showEmissionsAtResponse resultInfo.location resultInfo.freightId  resultInfo.emissionPercentage  resultInfo.directDistance emissions.primaryEnergy.rail emissions.primaryEnergy.sea emissions.primaryEnergy.air emissions.primaryEnergy.inlandWaterways  emissions.primaryEnergy.road._value_1  emissions.primaryEnergy.road.wellToTank  emissions.primaryEnergy.road.tankToWheel emissions.primaryEnergy.logisticsite emissions.primaryEnergy.transfer emissions.primaryEnergy.unit emissions.carbonDioxide.rail emissions.carbonDioxide.sea emissions.carbonDioxide.air emissions.carbonDioxide.inlandWaterways  emissions.carbonDioxide.road._value_1  emissions.carbonDioxide.road.wellToTank  emissions.carbonDioxide.road.tankToWheel
0            None                      59.0                    ton                         None                         None                        None                                       True           zip:63937                 None                            1.0                     767.71                         None                        None                        None                                    None                            70351.63121                                    13412                                      5693                                 None                             None                    MegaJoule                         None                        None                        None                                    None                                4.86624                                    0.902                                      3.96
1            None                      59.0                    ton                         None                         None                        None                                       True           zip:63937                 None                            1.0                     767.71                         None                        None                        None                                    None                            70351.63121                                    13412                                      5693                                 None                             None                    MegaJoule                         None                        None                        None                                    None                                4.86624                                    0.902                                      3.96
2            None                      59.0                    ton                         None                         None                        None                                       True           zip:63937                 None                            1.0                     767.71                         None                        None                        None                                    None                            70351.63121                                    13412                                      5693                                 None                             None                    MegaJoule                         None                        None                        None                                    None                                4.86624                                    0.902                                      3.96

数据

data = [{
        'resultInfo': {
            'load': None,
            'unload': {
                'weight': 59.0,
                'unit': 'ton',
                'tonsPerTeu': None,
                'tonsPerFeu': None,
                'freightId': None,
                'showEmissionsAtResponse': True
            },
            'location': 'zip:63937',
            'freightId': None,
            'emissionPercentage': 1.0,
            'directDistance': 767.71
        },
        'emissions': {
            'primaryEnergy': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 70351.631210000000,
                    'wellToTank': 13412,
                    'tankToWheel': 5693
                },
                'logisticsite': None,
                'transfer': None,
                'unit': 'MegaJoule'
            },
            'carbonDioxide': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 4.866239643000,
                    'wellToTank': 0.902,
                    'tankToWheel': 3.96
                },
            }
        }
    },
    {
        'resultInfo': {
            'load': None,
            'unload': {
                'weight': 59.0,
                'unit': 'ton',
                'tonsPerTeu': None,
                'tonsPerFeu': None,
                'freightId': None,
                'showEmissionsAtResponse': True
            },
            'location': 'zip:63937',
            'freightId': None,
            'emissionPercentage': 1.0,
            'directDistance': 767.71
        },
        'emissions': {
            'primaryEnergy': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 70351.631210000000,
                    'wellToTank': 13412,
                    'tankToWheel': 5693
                },
                'logisticsite': None,
                'transfer': None,
                'unit': 'MegaJoule'
            },
            'carbonDioxide': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 4.866239643000,
                    'wellToTank': 0.902,
                    'tankToWheel': 3.96
                },
            }
        }
    },
    {
        'resultInfo': {
            'load': None,
            'unload': {
                'weight': 59.0,
                'unit': 'ton',
                'tonsPerTeu': None,
                'tonsPerFeu': None,
                'freightId': None,
                'showEmissionsAtResponse': True
            },
            'location': 'zip:63937',
            'freightId': None,
            'emissionPercentage': 1.0,
            'directDistance': 767.71
        },
        'emissions': {
            'primaryEnergy': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 70351.631210000000,
                    'wellToTank': 13412,
                    'tankToWheel': 5693
                },
                'logisticsite': None,
                'transfer': None,
                'unit': 'MegaJoule'
            },
            'carbonDioxide': {
                'rail': None,
                'sea': None,
                'air': None,
                'inlandWaterways': None,
                'road': {
                    '_value_1': 4.866239643000,
                    'wellToTank': 0.902,
                    'tankToWheel': 3.96
                },
            }
        }
    }
]

python - 从嵌套字典到 python Dataframe

1 回答 1

数据

Related

Reference