pandas: Series values cannot be accessed by numeric categorical index

Code Sample

>>> s1 = pd.Series(['a', 'b', 'c'], index=pd.Series([1, 2, 3]))
>>> s1.get(3)
'c'

>>> s2 = pd.Series(['a', 'b', 'c'], index=pd.Series([1, 2, 3], dtype='category'))
>>> s2.get(3) is None
True
>>> s2.get(0)
'a'

Problem description

In my opinion behavior in the second case can be error-prone (when there is an overlap between positional index and categorical one) and inconvenient (forces to use positional index).

Expected Output

>>> s2.get(3)
'c'

Output of pd.show_versions()

INSTALLED VERSIONS

commit: None python: 3.5.2.final.0 python-bits: 64 OS: Darwin OS-release: 16.1.0 machine: x86_64 processor: i386 byteorder: little LC_ALL: en_US.UTF-8 LANG: en_US.UTF-8 LOCALE: en_US.UTF-8

pandas: 0.19.1 nose: None pip: 9.0.1 setuptools: 27.2.0 Cython: None numpy: 1.11.2 scipy: 0.18.1 statsmodels: None xarray: None IPython: 5.1.0 sphinx: None patsy: None dateutil: 2.6.0 pytz: 2016.7 blosc: None bottleneck: None tables: None numexpr: None matplotlib: 1.5.3 openpyxl: None xlrd: None xlwt: None xlsxwriter: None lxml: None bs4: None html5lib: None httplib2: None apiclient: None sqlalchemy: None pymysql: None psycopg2: None jinja2: 2.8 boto: None pandas_datareader: None

About this issue

  • Original URL
  • State: closed
  • Created 8 years ago
  • Comments: 24 (14 by maintainers)

Most upvoted comments

I think that’s correct (though @jreback will know better). This should also fix this error, which I think is a bug:


In [1]: import pandas as pd

In [2]: s = pd.Series(['a', 'b', 'c'], index=pd.CategoricalIndex([1, 2, 3]))

In [3]: s
Out[3]:
1    a
2    b
3    c
dtype: object

In [4]: s.loc[1]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-4-609ae8a0f3fe> in <module>()
----> 1 s.loc[1]

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexing.py in __getitem__(self, key)
   1371
   1372             maybe_callable = com._apply_if_callable(key, self.obj)
-> 1373             return self._getitem_axis(maybe_callable, axis=axis)
   1374
   1375     def _is_scalar_access(self, key):

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1624
   1625         # fall thru to straight lookup
-> 1626         self._has_valid_type(key, axis)
   1627         return self._get_label(key, axis=axis)
   1628

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexing.py in _has_valid_type(self, key, axis)
   1502
   1503             try:
-> 1504                 key = self._convert_scalar_indexer(key, axis)
   1505                 if not ax.contains(key):
   1506                     error()

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexing.py in _convert_scalar_indexer(self, key, axis)
    254         ax = self.obj._get_axis(min(axis, self.ndim - 1))
    255         # a scalar
--> 256         return ax._convert_scalar_indexer(key, kind=self.name)
    257
    258     def _convert_slice_indexer(self, key, axis):

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexes/category.py in _convert_scalar_indexer(self, key, kind)
    573
    574         return super(CategoricalIndex, self)._convert_scalar_indexer(
--> 575             key, kind=kind)
    576
    577     @Appender(_index_shared_docs['_convert_list_indexer'])

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexes/base.py in _convert_scalar_indexer(self, key, kind)
   1391             elif kind in ['loc'] and is_integer(key):
   1392                 if not self.holds_integer():
-> 1393                     return self._invalid_indexer('label', key)
   1394
   1395         return key

~/Envs/pandas-dev/lib/python3.6/site-packages/pandas/pandas/core/indexes/base.py in _invalid_indexer(self, form, key)
   1575                         "indexers [{key}] of {kind}".format(
   1576                             form=form, klass=type(self), key=key,
-> 1577                             kind=type(key)))
   1578
   1579     def get_duplicates(self):

TypeError: cannot do label indexing on <class 'pandas.core.indexes.category.CategoricalIndex'> with these indexers [1] of <class 'int'>

That should return 'a'