aiobotocore: [tracking aiohttp] Possible Memory Leak with SSL Sessions
Hello,
I’m calling S3 using:
-python 3.6.3 -aiobotocore 0.6.0 -Ubuntu 17.10
I notice that when I have a loop of async calls to retrieve data, my memory usage seems to continually go up, even though I expect it to stay the same. Here is a test case I could come up with:
import asyncio
from hashlib import md5
import aiobotocore
from botocore.client import Config
import boto3
from botocore.exceptions import ClientError as BotoClientError
from itertools import islice, chain
class FileService(object):
def __init__(self, loop=None):
config = Config(signature_version='s3v4')
self._loop = loop or asyncio.get_event_loop()
self._session = boto3.Session()
self._asession = aiobotocore.get_session(loop=self._loop)
self._config = config
def _get_s3_client(self):
# Use the credentials created by boto3 session.
creds = self._session.get_credentials()
client = self._asession.create_client('s3', region_name="us-east-1",
aws_secret_access_key=creds.secret_key,
aws_access_key_id=creds.access_key,
aws_session_token=creds.token,
config=self._config)
return client
def readfiles(self, paths):
return self._loop.run_until_complete(self.readfiles_async(paths))
def readfile(self, path, client=None):
return self._loop.run_until_complete(self.readfile_async(path, client=client))
async def readfiles_async(self, paths):
async with self._get_s3_client() as client:
futures = [self.readfile_async(path, client=client) for path in paths]
return await asyncio.gather(*futures)
async def readfile_async(self, path, client=None):
data = await self._get_object_async(client, path)
return data
async def _get_object_async(self, client, path):
bucket, key = parse_s3_path(path)
try:
response = await client.get_object(Bucket=bucket, Key=key)
async with response['Body'] as stream:
return await stream.read()
except BotoClientError as e:
if e.response['Error']['Code'] == 'NoSuchKey':
pass
def parse_s3_path(path):
s3_tokens = path.split('/', 3)
try:
return s3_tokens[2], s3_tokens[3]
except IndexError:
return s3_tokens[2], ''
def chunked(iterable, size=None):
if not size:
yield iterable
else:
it = iter(iterable)
while True:
s = islice(it, size)
yield chain([next(s)], s)
service = FileService()
ids = range(0, 10000)
for chunk in chunked(ids, 250):
file_paths = ["s3://mybucket/%s.file" % x for x in chunk]
all_data = service.readfiles(file_paths)
Each chunk increases the memory size my a noticeable margin, and does not seem to release it on subsequent chunks.
I am not an expert at debugging memory issues in python, but I tried using objgraph to see which objects were getting created on each subsequent loop and found that the majority appear to be SSL contexts.
Thanks, Tim
About this issue
- Original URL
- State: closed
- Created 6 years ago
- Comments: 18 (1 by maintainers)
yay, I think this is now fixed