Python with DataFrame merge of aggregations ...error: '' is both an index level and a column label, which is ambiguous

import pandas as pd

data1 = [
{'File': '7396ee8aea09.json', 'Last': '2019-04-25 20:24:28+00:00', 'Size': 78796, 'COLA': 'STANDARD'}, 
{'File': '7777682e4/ip/done', 'Last': '2019-04-24 21:45:01+00:00', 'Size': 4, 'COLA': 'CUSTOM'}, 
{'File': 'erparameters.json', 'Last': '2019-04-24 21:45:01+00:00', 'Size': 351, 'COLA': 'STANDARD'}
]
data2 = [
{'File': '3343e4/ip/ip.json', 'Last': '2019-04-24 21:45:01+00:00', 'Size': 20, 'COLA': 'STANDARD'}, 
{'File': 'ta-00000-of-00001', 'Last': '2019-04-24 22:43:17+00:00', 'Size': 140216236, 'COLA': 'STANDARD'}, 
{'File': '-20514.ckpt.index', 'Last': '2019-04-24 22:43:17+00:00', 'Size': 3168, 'COLA': 'STANDARD'}, 
]
# right now 2 for the prova but could be much more..
datas = [data1, data2]
results = None 
for data in datas:
  df = pd.DataFrame(data, columns=['COLA','Size','Last'])\
              .groupby(['COLA'])\
              .agg({
                  'COLA': [('COLA', 'max'), ('Count', 'count')],
                  'Size': [('Size', 'sum')],
                  'Last': [('Last', 'max')]
                  })
  df.columns = ['COLA', 'Count', 'Size', 'Last']

  if results is None:
    results = df
  else:
    results = results.merge(df,
                   on=['COLA', 'Count', 'Size', 'Last'],
                   how='outer',
                   )\
             .groupby(['COLA'], as_index=False)\
             .agg({
                   'COLA': [('COLA', 'max'), ('Count', 'sum')],
                   'Size': [('Size', 'sum')],
                   'Last': [('Last', 'max')]
                 })

print (results)

expected results is in that format:

COLA have all unique COLA labels
Size is the sum of all 'Size' for that COLA label group across all sets.
Count is the total count of that COLA label group across all sets.
Last has the greatest date of that COLA label group across all sets.

Example:

results = [
     {'COLA': 'STANDARD', 'Size': 140298571, 'Count': 5, 'Last': '2019-04-25 20:24:28+00:00'}, 
     {'COLA': 'CUSTOM',   'Size': 4,         'Count': 1, 'Last': '2019-04-24 21:45:01+00:00'}
]

df = pd.concat(pd.DataFrame(d) for d in datas) (df.groupby('COLA').agg({'COLA':'count', 'Size':'max', 'Last':'max'}) .rename(columns={'COLA':'Count'}) .reset_index() .to_dict('rows') )

df = pd.concat(pd.DataFrame(d) .groupby('COLA') .agg({'COLA':'count', 'Size':'max', 'Last':'max'}) .rename(columns={'COLA':'Count'}) for data in datas ) df.groupby('COLA').agg({'Count':'sum', 'Size':'max', 'last':'max'})

Recommended topics

Hot tags