aiobotocore: [tracking aiohttp] Possible Memory Leak with SSL Sessions

Hello,

I’m calling S3 using:

-python 3.6.3 -aiobotocore 0.6.0 -Ubuntu 17.10

I notice that when I have a loop of async calls to retrieve data, my memory usage seems to continually go up, even though I expect it to stay the same. Here is a test case I could come up with:

import asyncio
from hashlib import md5
import aiobotocore
from botocore.client import Config
import boto3
from botocore.exceptions import ClientError as BotoClientError
from itertools import islice, chain

class FileService(object):
    def __init__(self, loop=None):
        config = Config(signature_version='s3v4')
        self._loop = loop or asyncio.get_event_loop()

        self._session = boto3.Session()
        self._asession = aiobotocore.get_session(loop=self._loop)
        self._config = config

    def _get_s3_client(self):
        # Use the credentials created by boto3 session.
        creds = self._session.get_credentials()
        client = self._asession.create_client('s3', region_name="us-east-1",
                                              aws_secret_access_key=creds.secret_key,
                                              aws_access_key_id=creds.access_key,
                                              aws_session_token=creds.token,
                                              config=self._config)
        return client

    def readfiles(self, paths):
        return self._loop.run_until_complete(self.readfiles_async(paths))

    def readfile(self, path, client=None):
        return self._loop.run_until_complete(self.readfile_async(path, client=client))

    async def readfiles_async(self, paths):
        async with self._get_s3_client() as client:
            futures = [self.readfile_async(path, client=client) for path in paths]
            return await asyncio.gather(*futures)

    async def readfile_async(self, path, client=None):
        data = await self._get_object_async(client, path)
        return data

    async def _get_object_async(self, client, path):
        bucket, key = parse_s3_path(path)
        try:
            response = await client.get_object(Bucket=bucket, Key=key)
            async with response['Body'] as stream:
                return await stream.read()
        except BotoClientError as e:
            if e.response['Error']['Code'] == 'NoSuchKey':
                pass


def parse_s3_path(path):
    s3_tokens = path.split('/', 3)
    try:
        return s3_tokens[2], s3_tokens[3]
    except IndexError:
        return s3_tokens[2], ''


def chunked(iterable, size=None):
    if not size:
        yield iterable
    else:
        it = iter(iterable)
        while True:
            s = islice(it, size)
            yield chain([next(s)], s)


service = FileService()

ids = range(0, 10000)
for chunk in chunked(ids, 250):
    file_paths = ["s3://mybucket/%s.file" % x for x in chunk]
    all_data = service.readfiles(file_paths)

Each chunk increases the memory size my a noticeable margin, and does not seem to release it on subsequent chunks.

I am not an expert at debugging memory issues in python, but I tried using objgraph to see which objects were getting created on each subsequent loop and found that the majority appear to be SSL contexts.

Thanks, Tim

About this issue

Original URL
State: closed
Created 6 years ago
Comments: 18 (1 by maintainers)

Most upvoted comments

yay, I think this is now fixed

thehesiod on Apr 21, 2019