aiobotocore: [tracking aiohttp] Possible Memory Leak with SSL Sessions

Hello,

I’m calling S3 using:

-python 3.6.3 -aiobotocore 0.6.0 -Ubuntu 17.10

I notice that when I have a loop of async calls to retrieve data, my memory usage seems to continually go up, even though I expect it to stay the same. Here is a test case I could come up with:

import asyncio
from hashlib import md5
import aiobotocore
from botocore.client import Config
import boto3
from botocore.exceptions import ClientError as BotoClientError
from itertools import islice, chain

class FileService(object):
    def __init__(self, loop=None):
        config = Config(signature_version='s3v4')
        self._loop = loop or asyncio.get_event_loop()

        self._session = boto3.Session()
        self._asession = aiobotocore.get_session(loop=self._loop)
        self._config = config

    def _get_s3_client(self):
        # Use the credentials created by boto3 session.
        creds = self._session.get_credentials()
        client = self._asession.create_client('s3', region_name="us-east-1",
                                              aws_secret_access_key=creds.secret_key,
                                              aws_access_key_id=creds.access_key,
                                              aws_session_token=creds.token,
                                              config=self._config)
        return client

    def readfiles(self, paths):
        return self._loop.run_until_complete(self.readfiles_async(paths))

    def readfile(self, path, client=None):
        return self._loop.run_until_complete(self.readfile_async(path, client=client))

    async def readfiles_async(self, paths):
        async with self._get_s3_client() as client:
            futures = [self.readfile_async(path, client=client) for path in paths]
            return await asyncio.gather(*futures)

    async def readfile_async(self, path, client=None):
        data = await self._get_object_async(client, path)
        return data

    async def _get_object_async(self, client, path):
        bucket, key = parse_s3_path(path)
        try:
            response = await client.get_object(Bucket=bucket, Key=key)
            async with response['Body'] as stream:
                return await stream.read()
        except BotoClientError as e:
            if e.response['Error']['Code'] == 'NoSuchKey':
                pass


def parse_s3_path(path):
    s3_tokens = path.split('/', 3)
    try:
        return s3_tokens[2], s3_tokens[3]
    except IndexError:
        return s3_tokens[2], ''


def chunked(iterable, size=None):
    if not size:
        yield iterable
    else:
        it = iter(iterable)
        while True:
            s = islice(it, size)
            yield chain([next(s)], s)


service = FileService()

ids = range(0, 10000)
for chunk in chunked(ids, 250):
    file_paths = ["s3://mybucket/%s.file" % x for x in chunk]
    all_data = service.readfiles(file_paths)

Each chunk increases the memory size my a noticeable margin, and does not seem to release it on subsequent chunks.

I am not an expert at debugging memory issues in python, but I tried using objgraph to see which objects were getting created on each subsequent loop and found that the majority appear to be SSL contexts.

Thanks, Tim

About this issue

  • Original URL
  • State: closed
  • Created 6 years ago
  • Comments: 18 (1 by maintainers)

Most upvoted comments

yay, I think this is now fixed