datasets-server: Raise specific errors (and error_code) instead of UnexpectedError

The following query on the production database gives the number of datasets with at least one cache entry with error_code “UnexpectedError”, grouped by the underlying “cause_exception”.

For the most common ones (DatasetGenerationError, HfHubHTTPError, OSError, etc.) we would benefit from raising a specific error with its error code. It would allow to:

  • retry automatically, if needed
  • show an adequate error message to the users
  • even: adapt the way we show the dataset viewer on the Hub

null means it has no details.cause_exception. These cache entries should be inspected more closely. See https://github.com/huggingface/datasets-server/issues/1123 in particular, which is one of the cases where no cause exception is reported.

db.cachedResponsesBlue.aggregate([
    {$match: {error_code: "UnexpectedError"}},
    {$group: {_id: {cause: "$details.cause_exception", dataset: "$dataset"}, count: {$sum: 1}}},
    {$group: {_id: "$_id.cause", count: {$sum: 1}}},
    {$sort: {count: -1}}
])
{ _id: 'DatasetGenerationError', count: 1964 }
{ _id: null, count: 1388 }
{ _id: 'HfHubHTTPError', count: 1154 }
{ _id: 'OSError', count: 433 }
{ _id: 'FileNotFoundError', count: 242 }
{ _id: 'FileExistsError', count: 198 }
{ _id: 'ValueError', count: 186 }
{ _id: 'TypeError', count: 160 }
{ _id: 'ConnectionError', count: 146 }
{ _id: 'RuntimeError', count: 86 }
{ _id: 'NonMatchingSplitsSizesError', count: 83 }
{ _id: 'FileSystemError', count: 62 }
{ _id: 'ClientResponseError', count: 52 }
{ _id: 'ArrowInvalid', count: 45 }
{ _id: 'ParquetResponseEmptyError', count: 43 }
{ _id: 'RepositoryNotFoundError', count: 41 }
{ _id: 'ManualDownloadError', count: 39 }
{ _id: 'IndexError', count: 28 }
{ _id: 'AttributeError', count: 16 }
{ _id: 'KeyError', count: 15 }
{ _id: 'GatedRepoError', count: 13 }
{ _id: 'NotImplementedError', count: 11 }
{ _id: 'ExpectedMoreSplits', count: 9 }
{ _id: 'PermissionError', count: 8 }
{ _id: 'BadRequestError', count: 7 }
{ _id: 'NonMatchingChecksumError', count: 6 }
{ _id: 'AssertionError', count: 4 }
{ _id: 'NameError', count: 4 }
{ _id: 'UnboundLocalError', count: 3 }
{ _id: 'JSONDecodeError', count: 3 }
{ _id: 'ZeroDivisionError', count: 3 }
{ _id: 'InvalidDocument', count: 3 }
{ _id: 'DoesNotExist', count: 3 }
{ _id: 'EOFError', count: 3 }
{ _id: 'ImportError', count: 3 }
{ _id: 'NotADirectoryError', count: 2 }
{ _id: 'RarCannotExec', count: 2 }
{ _id: 'ReadTimeout', count: 2 }
{ _id: 'ChunkedEncodingError', count: 2 }
{ _id: 'ExpectedMoreDownloadedFiles', count: 2 }
{ _id: 'InvalidConfigName', count: 2 }
{ _id: 'ModuleNotFoundError', count: 2 }
{ _id: 'Exception', count: 2 }
{ _id: 'MissingBeamOptions', count: 2 }
{ _id: 'HTTPError', count: 1 }
{ _id: 'BadZipFile', count: 1 }
{ _id: 'OverflowError', count: 1 }
{ _id: 'HFValidationError', count: 1 }
{ _id: 'IsADirectoryError', count: 1 }
{ _id: 'OperationalError', count: 1 }

About this issue

  • Original URL
  • State: open
  • Created a year ago
  • Comments: 16 (7 by maintainers)

Most upvoted comments

Current state:

db.cachedResponsesBlue.aggregate([
    {$match: {error_code: "UnexpectedError"}},
    {$group: {_id: {cause: "$details.cause_exception", dataset: "$dataset"}, count: {$sum: 1}}},
    {$group: {_id: "$_id.cause", count: {$sum: 1}}},
    {$sort: {count: -1}}
])
{ _id: 'DatasetGenerationError', count: 2767 }
{ _id: 'HfHubHTTPError', count: 795 }
{ _id: 'TypeError', count: 633 }
{ _id: 'ZeroDivisionError', count: 621 }
{ _id: 'IOException', count: 514 }
{ _id: 'ReadTimeout', count: 245 }
{ _id: 'OSError', count: 151 }
{ _id: 'BinderException', count: 127 }
{ _id: 'ConnectionError', count: 119 }
{ _id: 'ValueError', count: 103 }
{ _id: 'ParserException', count: 91 }
{ _id: 'EntryNotFoundError', count: 66 }
{ _id: 'NotImplementedError', count: 66 }
{ _id: 'FileNotFoundError', count: 60 }
{ _id: 'NonMatchingSplitsSizesError', count: 43 }
{ _id: 'BrokenPipeError', count: 39 }
{ _id: 'InvalidInputException', count: 36 }
{ _id: 'IndexError', count: 30 }
{ _id: 'OutOfRangeException', count: 30 }
{ _id: 'HTTPException', count: 21 }
{ _id: 'LocationParseError', count: 17 }
{ _id: 'RuntimeError', count: 15 }
{ _id: 'KeyError', count: 13 }
{ _id: 'BadZipFile', count: 9 }
{ _id: 'Error', count: 7 }
{ _id: 'ExpectedMoreSplits', count: 5 }
{ _id: 'ArrowInvalid', count: 5 }
{ _id: 'ConversionException', count: 4 }
{ _id: 'NameError', count: 4 }
{ _id: 'AssertionError', count: 4 }
{ _id: 'AttributeError', count: 3 }
{ _id: 'ModuleNotFoundError', count: 3 }
{ _id: 'PermissionError', count: 3 }
{ _id: 'NotPrimaryError', count: 3 }
{ _id: 'ParserError', count: 3 }
{ _id: 'ChunkedEncodingError', count: 2 }
{ _id: 'LocalEntryNotFoundError', count: 2 }
{ _id: 'RepositoryNotFoundError', count: 2 }
{ _id: 'UnboundLocalError', count: 2 }
{ _id: 'Exception', count: 2 }
{ _id: 'TypeMismatchException', count: 2 }
{ _id: 'ClientResponseError', count: 2 }
{ _id: 'JSONDecodeError', count: 1 }
{ _id: 'InvalidConfigName', count: 1 }
{ _id: 'GatedRepoError', count: 1 }
{ _id: 'CachedArtifactNotFoundError', count: 1 }
{ _id: 'HFValidationError', count: 1 }
{ _id: 'RarCannotExec', count: 1 }
{ _id: 'OutOfMemoryException', count: 1 }
{ _id: 'ImportError', count: 1 }
{ _id: 'NonStreamableDatasetError', count: 1 }
{ _id: 'OperationalError', count: 1 }
{ _id: 'SyntaxError', count: 1 }
{ _id: 'UnicodeDecodeError', count: 1 }
{ _id: 'EOFError', count: 1 }

After doing some cache maintenance actions manually (removing obsolete records which config or split no longer exist) this is the updated list mostly AttributeError and ClientResponseError reduced:

[
  { _id: { cause: 'DatasetGenerationError' }, count: 3791 },
  { _id: { cause: 'TypeError' }, count: 2222 },
  { _id: { cause: 'ParserException' }, count: 2095 },
  { _id: { cause: 'InvalidInputException' }, count: 1750 },
  { _id: { cause: 'FileNotFoundError' }, count: 1393 },
  { _id: { cause: 'ZeroDivisionError' }, count: 1224 },
  { _id: { cause: 'HfHubHTTPError' }, count: 1128 },
  { _id: { cause: 'NonMatchingSplitsSizesError' }, count: 1116 },
  { _id: { cause: 'IOException' }, count: 1035 },
  { _id: { cause: 'CachedArtifactNotFoundError' }, count: 745 },
  { _id: { cause: 'HTTPException' }, count: 526 },
  { _id: { cause: 'NotImplementedError' }, count: 493 },
  { _id: { cause: 'BinderException' }, count: 462 },
  { _id: { cause: 'KeyError' }, count: 454 },
  { _id: { cause: 'ReadTimeout' }, count: 311 },
  { _id: { cause: 'ParquetResponseEmptyError' }, count: 292 },
  { _id: { cause: 'ConnectionError' }, count: 201 },
  { _id: { cause: 'ValueError' }, count: 187 },
  { _id: { cause: 'AttributeError' }, count: 127 },
  { _id: { cause: 'IndexError' }, count: 107 },
  { _id: { cause: 'OSError' }, count: 102 },
  { _id: { cause: 'ClientResponseError' }, count: 94 },
  { _id: { cause: 'EntryNotFoundError' }, count: 92 },
  { _id: { cause: 'AssertionError' }, count: 84 },
  { _id: { cause: 'BadZipFile' }, count: 61 },
  { _id: { cause: 'OutOfRangeException' }, count: 46 },
  { _id: { cause: 'ModuleNotFoundError' }, count: 43 },
  { _id: { cause: 'LocationParseError' }, count: 29 },
  { _id: { cause: 'ArrowInvalid' }, count: 28 },
  { _id: { cause: 'CatalogException' }, count: 26 },
  { _id: { cause: 'LocalEntryNotFoundError' }, count: 19 },
  { _id: { cause: 'Error' }, count: 16 },
  { _id: { cause: 'ServerDisconnectedError' }, count: 9 },
  { _id: { cause: 'SyntaxError' }, count: 8 },
  { _id: { cause: 'InvalidOperation' }, count: 8 },
  { _id: { cause: 'RuntimeError' }, count: 7 },
  { _id: { cause: 'PermissionError' }, count: 6 },
  { _id: { cause: 'UnboundLocalError' }, count: 6 },
  { _id: { cause: 'NameError' }, count: 5 },
  { _id: { cause: 'NonStreamableDatasetError' }, count: 3 },
  { _id: { cause: 'Exception' }, count: 3 },
  { _id: { cause: 'ChunkedEncodingError' }, count: 3 },
  { _id: { cause: 'SSLError' }, count: 3 },
  { _id: { cause: 'ExpectedMoreSplits' }, count: 2 },
  { _id: { cause: 'ConversionException' }, count: 2 },
  { _id: { cause: null }, count: 2 },
  { _id: { cause: 'ParserError' }, count: 2 },
  { _id: { cause: 'RepositoryNotFoundError' }, count: 2 },
  { _id: { cause: 'OperationalError' }, count: 1 },
  { _id: { cause: 'UnicodeDecodeError' }, count: 1 },
  { _id: { cause: 'TransactionException' }, count: 1 },
  { _id: { cause: 'OutOfMemoryException' }, count: 1 },
  { _id: { cause: 'DoesNotExist' }, count: 1 },
  { _id: { cause: 'ImportError' }, count: 1 },
  { _id: { cause: 'HFValidationError' }, count: 1 },
  { _id: { cause: 'JSONDecodeError' }, count: 1 },
  { _id: { cause: 'EOFError' }, count: 1 },
  { _id: { cause: 'TypeMismatchException' }, count: 1 },
  { _id: { cause: 'InternalException' }, count: 1 }
]