xarray: dask compute on reduction failes with ValueError
I’m doing a reduction like mean
on a dask-backed DataArray
, and passing it to dask.compute
In [3]: from dask import compute
...: import numpy as np
...: import xarray as xr
...:
In [4]: data = xr.DataArray(np.random.random(size=(10, 2)),
...: dims=['samples', 'features']).chunk((5, 2))
...:
In [5]: compute(data.mean(axis=0))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-47605102585c> in <module>()
----> 1 compute(data.mean(axis=0))
~/Envs/dask-dev/lib/python3.6/site-packages/dask/dask/base.py in compute(*args, **kwargs)
334 results_iter = iter(results)
335 return tuple(a if f is None else f(next(results_iter), *a)
--> 336 for f, a in postcomputes)
337
338
~/Envs/dask-dev/lib/python3.6/site-packages/dask/dask/base.py in <genexpr>(.0)
334 results_iter = iter(results)
335 return tuple(a if f is None else f(next(results_iter), *a)
--> 336 for f, a in postcomputes)
337
338
~/Envs/dask-dev/lib/python3.6/site-packages/xarray/xarray/core/dataarray.py in _dask_finalize(results, func, args, name)
607 @staticmethod
608 def _dask_finalize(results, func, args, name):
--> 609 ds = func(results, *args)
610 variable = ds._variables.pop(_THIS_ARRAY)
611 coords = ds._variables
~/Envs/dask-dev/lib/python3.6/site-packages/xarray/xarray/core/dataset.py in _dask_postcompute(results, info, *args)
551 func, args2 = v
552 r = results2.pop()
--> 553 result = func(r, *args2)
554 else:
555 result = v
~/Envs/dask-dev/lib/python3.6/site-packages/xarray/xarray/core/variable.py in _dask_finalize(results, array_func, array_args, dims, attrs, encoding)
389 results = {k: v for k, v in results.items() if k[0] == name} # cull
390 data = array_func(results, *array_args)
--> 391 return Variable(dims, data, attrs=attrs, encoding=encoding)
392
393 @property
~/Envs/dask-dev/lib/python3.6/site-packages/xarray/xarray/core/variable.py in __init__(self, dims, data, attrs, encoding, fastpath)
267 """
268 self._data = as_compatible_data(data, fastpath=fastpath)
--> 269 self._dims = self._parse_dimensions(dims)
270 self._attrs = None
271 self._encoding = None
~/Envs/dask-dev/lib/python3.6/site-packages/xarray/xarray/core/variable.py in _parse_dimensions(self, dims)
431 raise ValueError('dimensions %s must have the same length as the '
432 'number of data dimensions, ndim=%s'
--> 433 % (dims, self.ndim))
434 return dims
435
ValueError: dimensions ('features',) must have the same length as the number of data dimensions, ndim=0
The expected output is the .compute
version, which works correctly:
In [7]: data.mean(axis=0).compute()
Out[7]:
<xarray.DataArray (features: 2)>
array([0.535643, 0.459406])
Dimensions without coordinates: features
In [6]: xr.show_versions()
INSTALLED VERSIONS
------------------
commit: c2b205f29467a4431baa80b5c07fe31bda67fbef
python: 3.6.1.final.0
python-bits: 64
OS: Darwin
OS-release: 16.7.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
xarray: 0.10.0-5-gc2b205f
pandas: 0.22.0.dev0+118.g4c6387520
numpy: 1.14.0.dev0+2995e6a
scipy: 1.1.0.dev0+b6fd544
netCDF4: 1.3.1
h5netcdf: None
Nio: None
bottleneck: None
cyordereddict: None
dask: 0.16.0+15.gcbc62fbef
matplotlib: 2.1.0
cartopy: None
seaborn: 0.8.1
setuptools: 36.7.2
pip: 10.0.0.dev0
conda: None
pytest: 3.2.3
IPython: 6.2.1
sphinx: 1.6.5
Apologies if I’m doing something silly here, I don’t know xarray 😃
About this issue
- Original URL
- State: closed
- Created 7 years ago
- Comments: 17 (15 by maintainers)
Also worth pointing out that this is likely the kind of bug that would have been caught with static typing
On Mon, Dec 4, 2017 at 6:55 PM, Stephan Hoyer notifications@github.com wrote: