xarray: Opendap access failure error
Hi all,
We are having some trouble with opendap access to our own thredds. I’ve detected that when I tried to access to small subsets of small variable arrays, opendap works perfect but now I obtained this error when I was trying to access to some ERA5 netCDF files (~8gb) with Xarray. Check my example code bellow:
import xarray as xr
import os
from datetime import datetime, timedelta
import pandas as pd
import shutil
import numpy as np
import time
tic = time.perf_counter()
# ------------------------------------------------------------------------------------------------------------------
# Inputs Example:
lonlat_box = [-4.5, -2.5, 44, 45]
date_ini = datetime(1998,5,28,12)
date_end = datetime(1998,6,1,12)
output_path = r'test_inputs\ERA5\data'
source_path = r'http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global'
# source_path = r'D:\2020_REPSOL\Codigos_input_TESEO\raw'
dl = 0.5
# ------------------------------------------------------------------------------------------------------------------
# Create results folders and paths
if os.path.exists(output_path):
pass
else:
os.makedirs(output_path)
# Change to (-180,180) if there is 0 to 360
if lonlat_box[0] > 180:
lonlat_box[0] = lonlat_box[0] - 360
if lonlat_box[1] > 180:
lonlat_box[1] = lonlat_box[1] - 360
# Check coordinates
if lonlat_box[0] < -19 or lonlat_box[1] > 5 or lonlat_box[2] < 26 or lonlat_box[3] > 56:
print("Invalid coordinates! coordinates must be Lon:(-19º,5º) and Lat:(26º,56)")
exit()
# Check time range
if date_ini<datetime(1992,1,1,0) or date_end>datetime(2017,12,31,23):
print("Invalid time range! This database provide data from 01/1992 to 12/2017")
exit()
# Create a tuple to store Lon Lat
Lon = (lonlat_box[0], lonlat_box[1])
Lat = (lonlat_box[2], lonlat_box[3])
del lonlat_box
# Create date list of files to be loaded
dates = pd.date_range(start=date_ini, end=date_end, closed=None, freq='D')
file_list = []
for date in dates:
p = list([source_path + '/Wind_ERA5_Global_' + date.strftime("%Y") + '.' + date.strftime("%m") + '.nc'])
file_list = file_list + p
# Delete repeated elements
file_list = list(dict.fromkeys(file_list))
print('Loading files: \n{}\n'.format("\n".join(file_list)))
# Load data
# ds = xr.open_mfdataset(file_list)
ds = xr.open_mfdataset(file_list)
# Select variables
ds = ds.get(['u', 'v'])
# from 0º,360º to -180º,180º
ds['lon'] = (ds.lon + 180) % 360 - 180
ds = ds.sortby('lon', 'lat')
# Select spatial subset [lon,lat]
ds = ds.where((ds.lon >= Lon[0] - dl) & (ds.lon <= Lon[1] + dl) & (ds.lat >= Lat[0] - dl) & (ds.lat <= Lat[1] + dl), drop=True)
# Select temporal subset
ds = ds.where((ds.time >= np.datetime64(date_ini)) & (ds.time <= np.datetime64(date_end)), drop=True)
# Create depth-layers file for 2D simulation
winds_list = []
# From xarray to dataframe
df = ds.to_dataframe()
Problem Description
If I run the process with local data the code runs perfect and there is no problem at all. I previously downloaded to my local PC two files to perform this test.
But when I used the opendap to generalize the process for any date using the opendap url
source_path = r'http://193.144.213.180:8080/thredds/dodsC/Wind/Wind_ERA5/Global'
I found this error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
d:\2020_REPSOL\Codigos_input_TESEO\draft_code.py in
82
83 # From xarray to dataframe
---> 84 df = ds.to_dataframe()
85
86 df = df.reset_index()
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in to_dataframe(self)
3335 this dataset's indices.
3336 """
-> 3337 return self._to_dataframe(self.dims)
3338
3339 @classmethod
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in _to_dataframe(self, ordered_dims)
3324 columns = [k for k in self.variables if k not in self.dims]
3325 data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
-> 3326 for k in columns]
3327 index = self.coords.to_index(ordered_dims)
3328 return pd.DataFrame(OrderedDict(zip(columns, data)), index=index)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\dataset.py in (.0)
3324 columns = [k for k in self.variables if k not in self.dims]
3325 data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
-> 3326 for k in columns]
3327 index = self.coords.to_index(ordered_dims)
3328 return pd.DataFrame(OrderedDict(zip(columns, data)), index=index)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\variable.py in values(self)
390 def values(self):
391 """The variable's data as a numpy.ndarray"""
--> 392 return _as_array_or_item(self._data)
393
394 @values.setter
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\variable.py in _as_array_or_item(data)
211 TODO: remove this (replace with np.asarray) once these issues are fixed
212 """
--> 213 data = np.asarray(data)
214 if data.ndim == 0:
215 if data.dtype.kind == 'M':
~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\array\core.py in __array__(self, dtype, **kwargs)
996
997 def __array__(self, dtype=None, **kwargs):
--> 998 x = self.compute()
999 if dtype and x.dtype != dtype:
1000 x = x.astype(dtype)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
396 keys = [x.__dask_keys__() for x in collections]
397 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 398 results = schedule(dsk, keys, **kwargs)
399 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
400
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
74 results = get_async(pool.apply_async, len(pool._pool), dsk, result,
75 cache=cache, get_id=_thread_get_id,
---> 76 pack_exception=pack_exception, **kwargs)
77
78 # Cleanup pools associated to dead threads
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
460 _execute_task(task, data) # Re-execute locally
461 else:
--> 462 raise_exception(exc, tb)
463 res, worker_id = loads(res_info)
464 state['cache'][key] = res
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\compatibility.py in reraise(exc, tb)
110 if exc.__traceback__ is not tb:
111 raise exc.with_traceback(tb)
--> 112 raise exc
113
114 import pickle as cPickle
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
228 try:
229 task, data = loads(task_info)
--> 230 result = _execute_task(task, data)
231 id = get_id()
232 result = dumps((result, id))
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in (.0)
116 elif istask(arg):
117 func, args = arg[0], arg[1:]
--> 118 args2 = [_execute_task(a, cache) for a in args]
119 return func(*args2)
120 elif not ishashable(arg):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk)
117 func, args = arg[0], arg[1:]
118 args2 = [_execute_task(a, cache) for a in args]
--> 119 return func(*args2)
120 elif not ishashable(arg):
121 return arg
~\AppData\Local\Continuum\miniconda3\lib\site-packages\dask\array\core.py in getter(a, b, asarray, lock)
80 c = a[b]
81 if asarray:
---> 82 c = np.asarray(c)
83 finally:
84 if lock:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
602
603 def __array__(self, dtype=None):
--> 604 return np.asarray(self.array, dtype=dtype)
605
606 def __getitem__(self, key):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
508 def __array__(self, dtype=None):
509 array = as_indexable(self.array)
--> 510 return np.asarray(array[self.key], dtype=None)
511
512 def transpose(self, order):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
66
67 def __array__(self, dtype=None):
---> 68 return self.func(self.array)
69
70 def __repr__(self):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in _scale_offset_decoding(data, scale_factor, add_offset, dtype)
182
183 def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
--> 184 data = np.array(data, dtype=dtype, copy=True)
185 if scale_factor is not None:
186 data *= scale_factor
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in __array__(self, dtype)
66
67 def __array__(self, dtype=None):
---> 68 return self.func(self.array)
69
70 def __repr__(self):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\coding\variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
133 ) -> np.ndarray:
134 """Mask all matching values in a NumPy arrays."""
--> 135 data = np.asarray(data, dtype=dtype)
136 condition = False
137 for fv in encoded_fill_values:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\numpy\core\numeric.py in asarray(a, dtype, order)
536
537 """
--> 538 return array(a, dtype, copy=False, order=order)
539
540
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in __array__(self, dtype)
508 def __array__(self, dtype=None):
509 array = as_indexable(self.array)
--> 510 return np.asarray(array[self.key], dtype=None)
511
512 def transpose(self, order):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\netCDF4_.py in __getitem__(self, key)
62 return indexing.explicit_indexing_adapter(
63 key, self.shape, indexing.IndexingSupport.OUTER,
---> 64 self._getitem)
65
66 def _getitem(self, key):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\core\indexing.py in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
776 """
777 raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
--> 778 result = raw_indexing_method(raw_key.tuple)
779 if numpy_indices.tuple:
780 # index the loaded np.ndarray
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\netCDF4_.py in _getitem(self, key)
73 with self.datastore.lock:
74 original_array = self.get_array(needs_lock=False)
---> 75 array = getitem(original_array, key)
76 except IndexError:
77 # Catch IndexError in netCDF4 and return a more informative
~\AppData\Local\Continuum\miniconda3\lib\site-packages\xarray\backends\common.py in robust_getitem(array, key, catch, max_retries, initial_delay)
53 for n in range(max_retries + 1):
54 try:
---> 55 return array[key]
56 except catch:
57 if n == max_retries:
netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()
netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Variable._get()
netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
RuntimeError: NetCDF: Access failure
We thought that can be related to the opendap service config in the thredds, and we try to raise by x100 and even x1000 these parameters.
<Opendap>
<ascLimit>50</ascLimit>
<binLimit>500</binLimit>
<serverVersion>opendap/3.7</serverVersion>
</Opendap>
The result of these changes is that now the error at the end says: RuntimeError: NetCDF: file not found
We do not know how to do to properly fix opendap access to this information, any help is highly appreciated.
Thank you in advance!!
About this issue
- Original URL
- State: closed
- Created 4 years ago
- Comments: 17 (8 by maintainers)
This depends entirely on the TDS server configuration. See comment in https://github.com/Unidata/netcdf-c/issues/1667#issuecomment-597372065. The default limit appears to be 500 MB.
It’s important to note that none of this has to do with xarray. Xarray is simply the top layer of a very deep software stack. If the TDS server could deliver larger data requests, and the netCDF4-python library could accept them, xarray would have no problem.