xarray: Problem opening unstructured grid ocean forecasts with 4D vertical coordinates

We can’t open the IOOS New England triangular mesh ocean forecasts with Xarray because it doesn’t understand their more complex CF vertical coordinate system.

import xarray as xr
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'
xr.open_dataset(url)

fails with:

MissingDimensionsError: 'siglay' has more than 1-dimension and the same name as one of its dimensions ('siglay', 'node'). xarray disallows such variables because they conflict with the coordinates used to label dimensions.

If you open this dataset with nc = netCDF4.Dataset(url) you can see what the data variables (e.g. temp) and coordinates (e.g. siglay) look like:

print(nc['temp'])

<class 'netCDF4._netCDF4.Variable'>
float32 temp(time, siglay, node)
    long_name: temperature
    standard_name: sea_water_potential_temperature
    units: degrees_C
    coordinates: time siglay lat lon
    type: data
    coverage_content_type: modelResult
    mesh: fvcom_mesh
    location: node
unlimited dimensions: time
current shape = (145, 40, 53087)

print(nc['siglay'])

<class 'netCDF4._netCDF4.Variable'>
float32 siglay(siglay, node)
    long_name: Sigma Layers
    standard_name: ocean_sigma_coordinate
    positive: up
    valid_min: -1.0
    valid_max: 0.0
    formula_terms: sigma: siglay eta: zeta depth: h
unlimited dimensions: 
current shape = (40, 53087)

So the siglay variable in this dataset specifies the fraction of water column contained in the layer and because this fraction changes over the grid, it has dimensions of siglay and node. The variable siglay is just one of the variables used in the calculation of this CF-compliant vertical coordinate. The actual vertical coordinate (after computation via formula_terms) ends up being 4D.

While we understand that there is no way to represent the vertical coordinate with a one-dimensional coordinate that xarray would like, it would be nice if there way to at least load the variable array data like temp into xarray. We tried:

ds = xr.open_dataset(url,decode_times=False, decode_coords=False, decode_cf=False)

and we get the same error.

Is there any workaround for this?

---------------------------------------------------------------------------
MissingDimensionsError                    Traceback (most recent call last)
<ipython-input-18-723c5c460db2> in <module>()
----> 1 xr.open_dataset(url)

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/backends/api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs)
    344             lock = _default_lock(filename_or_obj, engine)
    345         with close_on_error(store):
--> 346             return maybe_decode_store(store, lock)
    347     else:
    348         if engine is not None and engine != 'scipy':

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/backends/api.py in maybe_decode_store(store, lock)
    256             store, mask_and_scale=mask_and_scale, decode_times=decode_times,
    257             concat_characters=concat_characters, decode_coords=decode_coords,
--> 258             drop_variables=drop_variables)
    259 
    260         _protect_dataset_variables_inplace(ds, cache)

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/conventions.py in decode_cf(obj, concat_characters, mask_and_scale, decode_times, decode_coords, drop_variables)
    428         vars, attrs, concat_characters, mask_and_scale, decode_times,
    429         decode_coords, drop_variables=drop_variables)
--> 430     ds = Dataset(vars, attrs=attrs)
    431     ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
    432     ds._file_obj = file_obj

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/dataset.py in __init__(self, data_vars, coords, attrs, compat)
    363             coords = {}
    364         if data_vars is not None or coords is not None:
--> 365             self._set_init_vars_and_dims(data_vars, coords, compat)
    366         if attrs is not None:
    367             self.attrs = attrs

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/dataset.py in _set_init_vars_and_dims(self, data_vars, coords, compat)
    381 
    382         variables, coord_names, dims = merge_data_and_coords(
--> 383             data_vars, coords, compat=compat)
    384 
    385         self._variables = variables

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/merge.py in merge_data_and_coords(data, coords, compat, join)
    363     indexes = dict(extract_indexes(coords))
    364     return merge_core(objs, compat, join, explicit_coords=explicit_coords,
--> 365                       indexes=indexes)
    366 
    367 

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/merge.py in merge_core(objs, compat, join, priority_arg, explicit_coords, indexes)
    433     coerced = coerce_pandas_values(objs)
    434     aligned = deep_align(coerced, join=join, copy=False, indexes=indexes)
--> 435     expanded = expand_variable_dicts(aligned)
    436 
    437     coord_names, noncoord_names = determine_coords(coerced)

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/merge.py in expand_variable_dicts(list_of_variable_dicts)
    209                     var_dicts.append(coords)
    210 
--> 211                 var = as_variable(var, name=name)
    212                 sanitized_vars[name] = var
    213 

~/miniconda3/envs/pangeo/lib/python3.6/site-packages/xarray/core/variable.py in as_variable(obj, name)
    112                 'dimensions %r. xarray disallows such variables because they '
    113                 'conflict with the coordinates used to label '
--> 114                 'dimensions.' % (name, obj.dims))
    115         obj = obj.to_index_variable()
    116 

MissingDimensionsError: 'siglay' has more than 1-dimension and the same name as one of its dimensions ('siglay', 'node'). xarray disallows such variables because they conflict with the coordinates used to label dimensions.

About this issue

  • Original URL
  • State: closed
  • Created 6 years ago
  • Reactions: 1
  • Comments: 15 (8 by maintainers)

Commits related to this issue

Most upvoted comments

I’ve seen this issue come up a few more times recently, so I wanted to ask: in lieu of a “full fix” (a la https://github.com/pydata/xarray/pull/2405, with deep data model changes holding up the PR), would there be support for a partial coordinate-renaming-based fix? I’d imagine something like the following:

To read in netCDF like the following,

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    Conventions: CDM-Extended-CF
    history: Written by CFPointWriter
    title: Extract Points data from Grid file /data/ldm/pub/native/grid/NCEP/RR/CONUS_13km/RR_CONUS_13km_20210219_1400.grib2.ncx3#LambertConformal_337X451-40p01N-98p14W
    featureType: timeSeriesProfile
    time_coverage_start: 2021-02-19T16:00:00Z
    time_coverage_end: 2021-02-19T16:00:00Z
    geospatial_lat_min: 42.0295
    geospatial_lat_max: 42.0305
    geospatial_lon_min: -93.6405
    geospatial_lon_max: -93.6395
    dimensions(sizes): profile(1), station(1), isobaric(37), station_name_strlen(10), station_description_strlen(34)
    variables(dimensions): float32 isobaric(station, profile, isobaric), float32 u-component_of_wind_isobaric(station, profile, isobaric), float32 v-component_of_wind_isobaric(station, profile, isobaric), float32 Temperature_isobaric(station, profile, isobaric), float32 Relative_humidity_isobaric(station, profile, isobaric), |S1 station_name(station, station_name_strlen), |S1 station_description(station, station_description_strlen), float64 latitude(station), float64 longitude(station), float64 time(station, profile)
    groups: 

(note the problematic isobaric(station, profile, isobaric)), one could specify a kwarg to xr.open_dataset to rename it,

ds = xr.open_dataset("my_problematic_file.nc", rename_vars={'isobaric': 'isobaric_coord'})

thus giving

<xarray.Dataset>
Dimensions:                       (isobaric: 37, profile: 1, station: 1)
Dimensions without coordinates: isobaric, profile, station
Data variables:
    u-component_of_wind_isobaric  (station, profile, isobaric) float32 ...
    v-component_of_wind_isobaric  (station, profile, isobaric) float32 ...
    Temperature_isobaric          (station, profile, isobaric) float32 ...
    Relative_humidity_isobaric    (station, profile, isobaric) float32 ...
    station_name                  (station) |S10 ...
    station_description           (station) |S34 ...
    latitude                      (station) float64 ...
    longitude                     (station) float64 ...
    time                          (station, profile) datetime64[ns] ...
    isobaric_coord                (station, profile, isobaric) float32 ...
Attributes:
    Conventions:          CDM-Extended-CF
    history:              Written by CFPointWriter
    title:                Extract Points data from Grid file /data/ldm/pub/na...
    featureType:          timeSeriesProfile
    time_coverage_start:  2021-02-19T16:00:00Z
    time_coverage_end:    2021-02-19T16:00:00Z
    geospatial_lat_min:   42.0295
    geospatial_lat_max:   42.0305
    geospatial_lon_min:   -93.6405
    geospatial_lon_max:   -93.6395

This is the second follow-up item in https://github.com/pydata/xarray/issues/6293

I think we could definitely experiment with relaxing this constraint now, although ideally we would continue to check off auditing all of the methods in that long list first.

I had to go around this issue and not use xarray but pandas instead or plain netdcf4

nc = netCDF4.Dataset(input_file) h = nc.variables[vname] times = nc.variables[‘time’] jd = netCDF4.num2date(times[:],times.units) hs = pd.Series(h[:,station],index=jd)

I would love to know if there is a way to do it over xarray since it is so nice to use. Best regards

It is not ideal but you can workaround that by dropping the siglay variable.

ds = xr.open_dataset(url, drop_variables='siglay')

Hi

For now, I found a workaround loading and renaming the problematic coordinates with netCDF4.Dataset(). Soon I will post this and other solutions for this model output in iuryt/FVCOMpy.

For now, you could try:

import xarray as xr
from netCDF4 import Dataset

# define year and month to be read
year = 2019
month = 5
# we could use this to run a loop through the years/months we need


# list problematic coordinates
drop_variables = ['siglay','siglev']

# base url for openDAP server
url = "".join(["http://www.smast.umassd.edu:8080/thredds/dodsC/models/fvcom/",
        f"NECOFS/Archive/NECOFS_GOM/{year}/gom4_{year}{month:02d}.nc?"])

# lazy load of the data
ds = xr.open_dataset(url,drop_variables=drop_variables,decode_times=False)

# load data with netCDF4
nc = Dataset(url)
# load the problematic coordinates
coords = {name:nc[name] for name in drop_variables}
# function to extract ncattrs from `Dataset()`
get_attrs = lambda name: {attr:coords[name].getncattr(attr) for attr in coords[name].ncattrs()}
# function to convert from `Dataset()` to `xr.DataArray()`
nc2xr = lambda name: xr.DataArray(coords[name],attrs=get_attrs(name),name=f'{name}_coord',dims=(f'{name}','node'))
# merge `xr.DataArray()` objects
coords = xr.merge([nc2xr(name) for name in coords.keys()])
# reassign to the main `xr.Dataset()`
ds = ds.assign_coords(coords)

Leaving it here in case someone fall into the same problem.