boto3: ValueError: Invalid timestamp "": Unknown string format

code

for obj_sum in bucket.objects.all():
        if obj_sum.storage_class == 'GLACIER':
            obj = s3.Object(obj_sum.bucket_name, obj_sum.key)
                if not obj.restore:
                    print('Submitting restoration request: %s' % obj.key)
                    obj.restore_object(RestoreRequest={"Days": 7})

stacktrace:

Traceback (most recent call last):
  File "/Users/user/app/app-api/restore_s3.py", line 21, in <module>
    temp_restore()
  File "/Users/user/app/app-api/restore_s3.py", line 11, in temp_restore
    if not obj.restore:
  File "/Users/user/app/venv2/lib/python2.7/site-packages/boto3/resources/factory.py", line 339, in property_loader
    self.load()
  File "/Users/user/app/venv2/lib/python2.7/site-packages/boto3/resources/factory.py", line 505, in do_action
    response = action(self, *args, **kwargs)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/boto3/resources/action.py", line 83, in __call__
    response = getattr(parent.meta.client, operation_name)(**params)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/client.py", line 159, in _api_call
    return self._make_api_call(operation_name, kwargs)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/client.py", line 483, in _make_api_call
    operation_model, request_dict)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/endpoint.py", line 141, in make_request
    return self._send_request(request_dict, operation_model)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/endpoint.py", line 168, in _send_request
    request, operation_model, attempts)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/endpoint.py", line 233, in _get_response
    response_dict, operation_model.output_shape)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 211, in parse
    parsed = self._do_parse(response, shape)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 602, in _do_parse
    member_shapes, final_parsed)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 658, in _parse_non_payload_attrs
    member_shape, headers[header_name])
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 258, in _parse_shape
    return handler(shape, node)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 149, in _get_text_content
    return func(self, shape, text)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/parsers.py", line 412, in _handle_timestamp
    return self._timestamp_parser(text)
  File "/Users/user/app/venv2/lib/python2.7/site-packages/botocore/utils.py", line 336, in parse_timestamp
    raise ValueError('Invalid timestamp "%s": %s' % (value, e))
ValueError: Invalid timestamp "Thu,%2031%20Dec%202099%2020:00:00%20GMT": Unknown string format

tested on: python 2.7 and 3.5 python-dateutil==2.5.3 and python-dateutil==2.4.2 boto==2.39.0 and latest boto3 1.4.0

About this issue

  • Original URL
  • State: closed
  • Created 8 years ago
  • Reactions: 1
  • Comments: 18 (2 by maintainers)

Most upvoted comments

Problem

Any un-parseable Expires header will mean you cannot retrieve the object with boto3.

Proposed Solution

In botocore.parsers.ResponseParser, _handle_timestamp should return None or, just the original text if it cannot be converted to datetime. (Possibly wrapped in custom type, ie, Unparseable(data))

Or, the spec for “GetObjectResponse” should note a default value if “Expires” fails to parse.

Details

@kyleknap Here’s a reproduction from an internal issue we had with migrating from boto to boto3.

For some versions of boto, it would urlencode any unicode types passed inheaders, resulting in objects that cannot be retrieved by boto3.

It’s not present in 2.45.0, but I can confirm it is present in boto==2.38.0

import boto, six, datetime, os
from boto.s3.key import Key

bucket_name = 'my-bucket-to-test'
key_name = os.urandom(8).encode('hex')
data = os.urandom(16)
hdr_expires = u'Tue, 24 Jan 2017 07:52:32 GMT' # this being a unicode string causes the issue

conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name, validate=False)
key = bucket.new_key(key_name)
key.set_contents_from_file(
    six.BytesIO(data),
    headers={'Expires': hdr_expires})

# Wont expose the Expires header without subclass
class Key2(Key):
    def __init__(self, bucket=None, name=None):
        super(Key2, self).__init__(bucket=bucket, name=name)
        self._expires_header = {}
    def handle_addl_headers(self, headers):
        for k, v in headers:
            if k == 'expires':
                self._expires_header = v

out = six.BytesIO()
fetched = Key2(bucket, key_name)
fetched.get_contents_to_file(out)
print 'Boto2 Expires: %r' % fetched._expires_header

with boto==2.38.0 this prints

Boto2 Expires: 'Tue,%2024%20Jan%202017%2007:52:32%20GMT'

with boto==2.45.0 this prints

Boto2 Expires: 'Tue, 24 Jan 2017 07:52:32 GMT'

The result - objects affected by that bug cannot be read by boto3.

I hit this issue after upgrading an unmaintained wagtail (which is django under the hood) site from an ancient version, which also meant migrating from boto to boto3.

All of the static files and media previously uploaded by boto had the same url-encoded value in the ‘Expires’ metadata:

image

This caused my manage.py collectstatic to fail when boto3.utils.parse_datetime() encountered that value.

Building on the excellent workarounds suggested above, I came up with something slightly simpler:

"""boto_boto3_migration_patch.py"""

import botocore.utils
from urllib.parse import unquote


def patch_boto3_metadata_parsing():
    """Make boto3 able to parse url-encoded S3 metadata timestamps set by boto.
    https://github.com/boto/boto3/issues/808
    """

    original_func = botocore.utils.parse_timestamp

    def new_func(value):
        if value == "Thu,%2031%20Dec%202099%2020:00:00%20GMT":
            value = unquote(value)
        return original_func(value)

    botocore.utils.parse_timestamp = new_func

I just call patch_boto3_metadata_parsing() at the top of my django settings/production.py and collectstatic now works 😃

@nathan-muir added another line that resolved this issue for me. In the code patch i added

botocore.parsers.DEFAULT_TIMESTAMP_PARSER = _parse_timestamp

import botocore.session
from boto3 import setup_default_session
from boto3.session import Session
from botocore import parsers
from botocore.utils import parse_timestamp


def _parse_timestamp(value):
    try:
        return parse_timestamp(value)
    except ValueError:
        return None


def get_session(**kwargs):
    response_parser_factory = parsers.ResponseParserFactory()
    response_parser_factory.set_parser_defaults(
        timestamp_parser=_parse_timestamp
    )
    botocore_session = botocore.session.get_session()
    botocore_session.register_component('response_parser_factory', response_parser_factory)
    setup_default_session(botocore_session=botocore_session)

    parsers.DEFAULT_TIMESTAMP_PARSER = _parse_timestamp

    return Session(botocore_session=botocore_session, **kwargs)

I then called get_sessions in the init.py of my django project. This patches the timestamp parser across the entire project. Thanks for all the help 👍

@Dansong00 You can override the timestamp_parser, so it won’t fail (but you wont be able to read the value)… I used something like this:


import botocore.session
from boto3.session import Session
from botocore.parsers import ResponseParserFactory
from botocore.utils import parse_timestamp

def _parse_timestamp(value):
    try:
        return parse_timestamp(value)
    except ValueError:
        return None

def get_session(**kwargs):
    response_parser_factory = ResponseParserFactory()
    response_parser_factory.set_parser_defaults(
        timestamp_parser=_parse_timestamp
    )
    botocore_session = botocore.session.get_session()
    botocore_session.register_component('response_parser_factory', response_parser_factory)
    return Session(botocore_session=botocore_session, **kwargs)

You can use the module as follows:

from .mymodule import get_session

s = get_session()
client = s.client('s3')
resource =s.resource('s3')