kerchunk: xarray dimensions error when opening hdf5 reference with groups
Here is a zip of the data file and a reference json to the same file in azure
Opening the attached file works locally with xarray, provided the group is specified:
xr.open_dataset(
"./VNP14A1.A2020001.h08v04.001.2020003132203.h5",
group="HDFEOS/GRIDS/VNP14A1_Grid/Data Fields”
)
But when opening a reference to this file on Azure (test.json)
fs1 = fsspec.filesystem('reference', fo='test.json',
remote_protocol='az', remote_options={
'account_name' : 'modissa'
})
ds = xr.open_dataset(fs1.get_mapper("HDFEOS/GRIDS/VNP14A1_Grid/Data Fields"), engine='zarr')
Yields the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-37-a8b2cd409cf2> in <module>
4 })
5
----> 6 ds = xr.open_dataset(fs1.get_mapper("HDFEOS/GRIDS/VNP14A1_Grid/Data Fields"), engine='zarr')
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, backend_kwargs, *args, **kwargs)
494
495 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 496 backend_ds = backend.open_dataset(
497 filename_or_obj,
498 drop_variables=drop_variables,
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, consolidate_on_close, chunk_store, storage_options, lock)
745 store_entrypoint = StoreBackendEntrypoint()
746 with close_on_error(store):
--> 747 ds = store_entrypoint.open_dataset(
748 store,
749 mask_and_scale=mask_and_scale,
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/store.py in open_dataset(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
20 decode_timedelta=None,
21 ):
---> 22 vars, attrs = store.load()
23 encoding = store.get_encoding()
24
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/common.py in load(self)
122 """
123 variables = FrozenDict(
--> 124 (_decode_variable_name(k), v) for k, v in self.get_variables().items()
125 )
126 attributes = FrozenDict(self.get_attrs())
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in get_variables(self)
376
377 def get_variables(self):
--> 378 return FrozenDict(
379 (k, self.open_store_variable(k, v)) for k, v in self.ds.arrays()
380 )
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/utils.py in FrozenDict(*args, **kwargs)
444
445 def FrozenDict(*args, **kwargs) -> Frozen:
--> 446 return Frozen(dict(*args, **kwargs))
447
448
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in <genexpr>(.0)
377 def get_variables(self):
378 return FrozenDict(
--> 379 (k, self.open_store_variable(k, v)) for k, v in self.ds.arrays()
380 )
381
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in open_store_variable(self, name, zarr_array)
373 attributes["_FillValue"] = zarr_array.fill_value
374
--> 375 return Variable(dimensions, data, attributes, encoding)
376
377 def get_variables(self):
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/variable.py in __init__(self, dims, data, attrs, encoding, fastpath)
313 """
314 self._data = as_compatible_data(data, fastpath=fastpath)
--> 315 self._dims = self._parse_dimensions(dims)
316 self._attrs = None
317 self._encoding = None
/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/variable.py in _parse_dimensions(self, dims)
572 dims = tuple(dims)
573 if len(dims) != self.ndim:
--> 574 raise ValueError(
575 f"dimensions {dims} must have the same length as the "
576 f"number of data dimensions, ndim={self.ndim}"
ValueError: dimensions () must have the same length as the number of data dimensions, ndim=2
About this issue
- Original URL
- State: closed
- Created 3 years ago
- Comments: 27 (13 by maintainers)
I think you are checking for the case when there are dimensions (i.e., a non-empty shape), but
_get_array_dimsdoesn’t populate any names at all.OK, so the task is to find out why
._get_array_dimsfailed in this case. Perhaps this is because the file isn’t one netCDF, but several netCDFs stored in the hierarchy - I think this is the first such example.I would breakpoint in
._get_array_dimsto figure out why["phony_dim_0", "phony_dim_1"]are not being found.