kerchunk: xarray dimensions error when opening hdf5 reference with groups

Here is a zip of the data file and a reference json to the same file in azure

Opening the attached file works locally with xarray, provided the group is specified:

xr.open_dataset(
    "./VNP14A1.A2020001.h08v04.001.2020003132203.h5", 
    group="HDFEOS/GRIDS/VNP14A1_Grid/Data Fields”
)

But when opening a reference to this file on Azure (test.json)

fs1 = fsspec.filesystem('reference', fo='test.json', 
                        remote_protocol='az', remote_options={
                            'account_name' : 'modissa'
                        })

ds = xr.open_dataset(fs1.get_mapper("HDFEOS/GRIDS/VNP14A1_Grid/Data Fields"), engine='zarr')

Yields the following error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-a8b2cd409cf2> in <module>
      4                         })
      5 
----> 6 ds = xr.open_dataset(fs1.get_mapper("HDFEOS/GRIDS/VNP14A1_Grid/Data Fields"), engine='zarr')

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, backend_kwargs, *args, **kwargs)
    494 
    495     overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 496     backend_ds = backend.open_dataset(
    497         filename_or_obj,
    498         drop_variables=drop_variables,

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, synchronizer, consolidated, consolidate_on_close, chunk_store, storage_options, lock)
    745         store_entrypoint = StoreBackendEntrypoint()
    746         with close_on_error(store):
--> 747             ds = store_entrypoint.open_dataset(
    748                 store,
    749                 mask_and_scale=mask_and_scale,

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/store.py in open_dataset(self, store, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta)
     20         decode_timedelta=None,
     21     ):
---> 22         vars, attrs = store.load()
     23         encoding = store.get_encoding()
     24 

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/common.py in load(self)
    122         """
    123         variables = FrozenDict(
--> 124             (_decode_variable_name(k), v) for k, v in self.get_variables().items()
    125         )
    126         attributes = FrozenDict(self.get_attrs())

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in get_variables(self)
    376 
    377     def get_variables(self):
--> 378         return FrozenDict(
    379             (k, self.open_store_variable(k, v)) for k, v in self.ds.arrays()
    380         )

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/utils.py in FrozenDict(*args, **kwargs)
    444 
    445 def FrozenDict(*args, **kwargs) -> Frozen:
--> 446     return Frozen(dict(*args, **kwargs))
    447 
    448 

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in <genexpr>(.0)
    377     def get_variables(self):
    378         return FrozenDict(
--> 379             (k, self.open_store_variable(k, v)) for k, v in self.ds.arrays()
    380         )
    381 

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/backends/zarr.py in open_store_variable(self, name, zarr_array)
    373             attributes["_FillValue"] = zarr_array.fill_value
    374 
--> 375         return Variable(dimensions, data, attributes, encoding)
    376 
    377     def get_variables(self):

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/variable.py in __init__(self, dims, data, attrs, encoding, fastpath)
    313         """
    314         self._data = as_compatible_data(data, fastpath=fastpath)
--> 315         self._dims = self._parse_dimensions(dims)
    316         self._attrs = None
    317         self._encoding = None

/srv/conda/envs/notebook/lib/python3.8/site-packages/xarray/core/variable.py in _parse_dimensions(self, dims)
    572         dims = tuple(dims)
    573         if len(dims) != self.ndim:
--> 574             raise ValueError(
    575                 f"dimensions {dims} must have the same length as the "
    576                 f"number of data dimensions, ndim={self.ndim}"

ValueError: dimensions () must have the same length as the number of data dimensions, ndim=2

About this issue

  • Original URL
  • State: closed
  • Created 3 years ago
  • Comments: 27 (13 by maintainers)

Most upvoted comments

I think you are checking for the case when there are dimensions (i.e., a non-empty shape), but _get_array_dims doesn’t populate any names at all.

OK, so the task is to find out why ._get_array_dims failed in this case. Perhaps this is because the file isn’t one netCDF, but several netCDFs stored in the hierarchy - I think this is the first such example.

I would breakpoint in ._get_array_dims to figure out why ["phony_dim_0", "phony_dim_1"] are not being found.