pandas: cryptic DataFrame.agg error when using dictionaries

Not sure if this is a bug. This works:

(
    pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
    .groupby("a")
    .agg(lambda x: np.mean(x)/np.std(x))
)

while this returns an error:

(
    pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
    .groupby("a")
    .agg({"blah": lambda x: np.mean(x)/np.std(x)})
)

error: KeyError: ‘blah’

## LONG ERROR MESSAGE
KeyError                                  Traceback (most recent call last)
/opt/local/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2103             try:
-> 2104                 return self._engine.get_loc(key)
   2105             except KeyError:

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()

KeyError: 'blah'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-100-4f93bf630ec4> in <module>()
      2     pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
      3     .groupby("a")
----> 4     .agg({"blah": lambda x: np.mean(x)/np.std(x)})
      5 )

/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   3697     @Appender(SelectionMixin._agg_doc)
   3698     def aggregate(self, arg, *args, **kwargs):
-> 3699         return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
   3700 
   3701     agg = aggregate

/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
   3195 
   3196         _level = kwargs.pop('_level', None)
-> 3197         result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
   3198         if how is None:
   3199             return result

/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
    547 
    548                 try:
--> 549                     result = _agg(arg, _agg_1dim)
    550                 except SpecificationError:
    551 

/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _agg(arg, func)
    498                 result = compat.OrderedDict()
    499                 for fname, agg_how in compat.iteritems(arg):
--> 500                     result[fname] = func(fname, agg_how)
    501                 return result
    502 

/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _agg_1dim(name, how, subset)
    477                 aggregate a 1-dim with how
    478                 """
--> 479                 colg = self._gotitem(name, ndim=1, subset=subset)
    480                 if colg.ndim != 1:
    481                     raise SpecificationError("nested dictionary is ambiguous "

/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in _gotitem(self, key, ndim, subset)
   3724         elif ndim == 1:
   3725             if subset is None:
-> 3726                 subset = self.obj[key]
   3727             return SeriesGroupBy(subset, selection=key,
   3728                                  grouper=self.grouper)

/opt/local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2055             return self._getitem_multilevel(key)
   2056         else:
-> 2057             return self._getitem_column(key)
   2058 
   2059     def _getitem_column(self, key):

/opt/local/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2062         # get column
   2063         if self.columns.is_unique:
-> 2064             return self._get_item_cache(key)
   2065 
   2066         # duplicate columns & possible reduce dimensionality

/opt/local/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1384         res = cache.get(item)
   1385         if res is None:
-> 1386             values = self._data.get(item)
   1387             res = self._box_item_values(item, values)
   1388             cache[item] = res

/opt/local/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3518 
   3519             if not isnull(item):
-> 3520                 loc = self.items.get_loc(item)
   3521             else:
   3522                 indexer = np.arange(len(self.items))[isnull(self.items)]

/opt/local/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
   2104                 return self._engine.get_loc(key)
   2105             except KeyError:
-> 2106                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2107 
   2108         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()

pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()

KeyError: 'blah'```


INSTALLED VERSIONS
------------------
commit: None
python: 3.6.0.alpha.3
python-bits: 64
OS: Linux
OS-release: 3.14.32-xxxx-grs-ipv6-64
machine: x86_64
processor: 
byteorder: little
LC_ALL: None
LANG: en_IE.UTF-8
LOCALE: en_IE.UTF-8

pandas: 0.19.0
nose: None
pip: 8.1.2
setuptools: 28.3.0
Cython: 0.24.1
numpy: 1.11.2
scipy: 0.18.1
statsmodels: None
xarray: None
IPython: 5.1.0
sphinx: None
patsy: None
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: None
tables: None
numexpr: None
matplotlib: 2.0.0b3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: 3.6.4
bs4: 4.5.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.13
pymysql: None
psycopg2: 2.6.1 (dt dec pq3 ext lo64)
jinja2: 2.8
boto: None
pandas_datareader: None
</details>

About this issue

  • Original URL
  • State: closed
  • Created 8 years ago
  • Comments: 18 (9 by maintainers)

Most upvoted comments

pls read the docs you can do exactly that if u use a series groupby