pandas: cryptic DataFrame.agg error when using dictionaries
Not sure if this is a bug. This works:
(
pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
.groupby("a")
.agg(lambda x: np.mean(x)/np.std(x))
)
while this returns an error:
(
pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
.groupby("a")
.agg({"blah": lambda x: np.mean(x)/np.std(x)})
)
error: KeyError: ‘blah’
## LONG ERROR MESSAGE
KeyError Traceback (most recent call last)
/opt/local/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2103 try:
-> 2104 return self._engine.get_loc(key)
2105 except KeyError:
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()
KeyError: 'blah'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-100-4f93bf630ec4> in <module>()
2 pd.DataFrame({"u": [2,1,4,2,5], "a": ["a", "a", "b", "a", "b"]})
3 .groupby("a")
----> 4 .agg({"blah": lambda x: np.mean(x)/np.std(x)})
5 )
/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
3697 @Appender(SelectionMixin._agg_doc)
3698 def aggregate(self, arg, *args, **kwargs):
-> 3699 return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs)
3700
3701 agg = aggregate
/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in aggregate(self, arg, *args, **kwargs)
3195
3196 _level = kwargs.pop('_level', None)
-> 3197 result, how = self._aggregate(arg, _level=_level, *args, **kwargs)
3198 if how is None:
3199 return result
/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _aggregate(self, arg, *args, **kwargs)
547
548 try:
--> 549 result = _agg(arg, _agg_1dim)
550 except SpecificationError:
551
/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _agg(arg, func)
498 result = compat.OrderedDict()
499 for fname, agg_how in compat.iteritems(arg):
--> 500 result[fname] = func(fname, agg_how)
501 return result
502
/opt/local/lib/python3.6/site-packages/pandas/core/base.py in _agg_1dim(name, how, subset)
477 aggregate a 1-dim with how
478 """
--> 479 colg = self._gotitem(name, ndim=1, subset=subset)
480 if colg.ndim != 1:
481 raise SpecificationError("nested dictionary is ambiguous "
/opt/local/lib/python3.6/site-packages/pandas/core/groupby.py in _gotitem(self, key, ndim, subset)
3724 elif ndim == 1:
3725 if subset is None:
-> 3726 subset = self.obj[key]
3727 return SeriesGroupBy(subset, selection=key,
3728 grouper=self.grouper)
/opt/local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
2055 return self._getitem_multilevel(key)
2056 else:
-> 2057 return self._getitem_column(key)
2058
2059 def _getitem_column(self, key):
/opt/local/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
2062 # get column
2063 if self.columns.is_unique:
-> 2064 return self._get_item_cache(key)
2065
2066 # duplicate columns & possible reduce dimensionality
/opt/local/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1384 res = cache.get(item)
1385 if res is None:
-> 1386 values = self._data.get(item)
1387 res = self._box_item_values(item, values)
1388 cache[item] = res
/opt/local/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3518
3519 if not isnull(item):
-> 3520 loc = self.items.get_loc(item)
3521 else:
3522 indexer = np.arange(len(self.items))[isnull(self.items)]
/opt/local/lib/python3.6/site-packages/pandas/indexes/base.py in get_loc(self, key, method, tolerance)
2104 return self._engine.get_loc(key)
2105 except KeyError:
-> 2106 return self._engine.get_loc(self._maybe_cast_indexer(key))
2107
2108 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4160)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)()
pandas/src/hashtable_class_helper.pxi in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)()
KeyError: 'blah'```
INSTALLED VERSIONS
------------------
commit: None
python: 3.6.0.alpha.3
python-bits: 64
OS: Linux
OS-release: 3.14.32-xxxx-grs-ipv6-64
machine: x86_64
processor:
byteorder: little
LC_ALL: None
LANG: en_IE.UTF-8
LOCALE: en_IE.UTF-8
pandas: 0.19.0
nose: None
pip: 8.1.2
setuptools: 28.3.0
Cython: 0.24.1
numpy: 1.11.2
scipy: 0.18.1
statsmodels: None
xarray: None
IPython: 5.1.0
sphinx: None
patsy: None
dateutil: 2.5.3
pytz: 2016.7
blosc: None
bottleneck: None
tables: None
numexpr: None
matplotlib: 2.0.0b3
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: 3.6.4
bs4: 4.5.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.13
pymysql: None
psycopg2: 2.6.1 (dt dec pq3 ext lo64)
jinja2: 2.8
boto: None
pandas_datareader: None
</details>
About this issue
- Original URL
- State: closed
- Created 8 years ago
- Comments: 18 (9 by maintainers)
pls read the docs you can do exactly that if u use a series groupby