sails: Actual Memory Leak when accessing external DBs
Waterline version:0.11.2 Node version:v6.3.1 NPM version:3.10.5 Operating system:Ubuntu 16.04.1 LTS
Hi all,
Yep, I too roll my eyes each time someone says they’ve found a Sails/Waterline memory leak, but this time… I’ve found one, though I’m pretty sure it’s “caused” by dependencies, it only manifests itself in production
enviro.
What I’m trying to do is migrate our DB from MySQL to PostgreSQL, so I’m running a loop over one model with 1M+ rows, and pushing into another model. Each time I create a DB connection, another instance of tough-cookie
is loaded into memory and after a few thousand records Node runs out of memory. A heap dump reveals a few hundred thousand copies of tough-cookie’s TLD array: https://github.com/SalesforceEng/tough-cookie/blob/c11a2d11d12348a35ef595c809e30e641a804a7d/lib/pubsuffix.js#L95-L96
This large scale migration runs fine locally, but as soon as I’m accessing remote DBs the problem manifests - which is an issue in production.
Memory usage in log below is measured with process.memoryUsage().rss
*** Starting Sails in production ***
2016-08-04T01:34:02.835Z - info: ---- TIME --- MEM --- # --- Record ID
2016-08-04T01:34:02.835Z - info: ---- 01s --- 203MB --- 50 --- kwthz1c7Zj
2016-08-04T01:34:03.781Z - info: ---- 02s --- 221MB --- 100 --- CIjVS9wtxO
2016-08-04T01:34:04.654Z - info: ---- 03s --- 238MB --- 150 --- 1FUnEpVpOv
2016-08-04T01:34:05.546Z - info: ---- 04s --- 256MB --- 200 --- UMkcytCmhz
....
2016-08-04T01:34:19.627Z - info: ---- 18s --- 466MB --- 1000 --- tpban03BNs
2016-08-04T01:34:21.024Z - info: ---- 19s --- 507MB --- 1050 --- umdBBfLEjC
2016-08-04T01:34:21.896Z - info: ---- 20s --- 524MB --- 1100 --- yEmHHxAWyv
2016-08-04T01:34:22.796Z - info: ---- 21s --- 542MB --- 1150 --- rgDK9scqum
....
2016-08-04T01:34:38.375Z - info: ---- 37s --- 676MB --- 2000 --- txRr4nf0WW
2016-08-04T01:34:39.691Z - info: ---- 38s --- 712MB --- 2050 --- exypA9PjVC
2016-08-04T01:34:40.582Z - info: ---- 39s --- 729MB --- 2100 --- q4yUjZ4ct5
2016-08-04T01:34:41.482Z - info: ---- 40s --- 746MB --- 2150 --- 5AoIUcFSK1
....
2016-08-04T01:37:20.393Z - info: ---- 3m 19s --- 2531MB --- 9450 --- T7Pbquad
2016-08-04T01:37:21.305Z - info: ---- 3m 20s --- 2541MB --- 9500 --- hRsfquad
2016-08-04T01:37:22.337Z - info: ---- 3m 21s --- 2553MB --- 9550 --- Z4Ikquad
2016-08-04T01:37:23.296Z - info: ---- 3m 22s --- 2564MB --- 9600 --- rVGiquad
2016-08-04T01:37:24.372Z - info: ---- 3m 23s --- 2577MB --- 9650 --- T1nXquad
2016-08-04T01:37:25.366Z - info: ---- 3m 24s --- 2588MB --- 9700 --- oMsyquad
2016-08-04T01:37:26.381Z - info: ---- 3m 25s --- 2599MB --- 9750 --- PNzMquad
2016-08-04T01:37:27.397Z - info: ---- 3m 26s --- 2611MB --- 9800 --- rsjsquad
2016-08-04T01:37:28.377Z - info: ---- 3m 27s --- 2622MB --- 9850 --- nkJBquad
2016-08-04T01:37:29.434Z - info: ---- 3m 28s --- 2634MB --- 9900 --- lAT5quad
2016-08-04T01:37:30.463Z - info: ---- 3m 29s --- 2651MB --- 9950 --- L5yXquad
2016-08-04T01:37:31.514Z - info: ---- 3m 30s --- 2668MB --- 10000 --- Bml9quad
The code:
doMigration: function (req, res) {
// doMigration moves records from one model to another
//
//
// Guest is a model backed by MySQL
// GuestPostgres is a model backed by PostgreSQL to which I'm migrating
//
// fast === false does row by row and property by property to better highlight invalid data
res.send('Started');
let progress = 0;
let timeBegin = Date.now();
let fast == true;
let leftPad = function (input = '', length = 5, char = ' ') {
return (Array(length).join(char) + input).slice(-length);
};
let bytesToString = function (bytes) {
if (bytes > (1024 * 1024)) {
return Math.floor(bytes / 1048576) + 'MB';
} else {
return Math.floor(bytes / 1024) + 'KB';
}
};
let duration = function (timeBegin) {
let seconds = Math.floor((Date.now() - timeBegin) / 1000);
if (seconds > 3600) {
return Math.floor(seconds / 3600) + 'h ' + Math.floor((seconds % 3600) / 60) + 'm ' + leftPad(seconds % 60, 2, '0') + 's';
} else if (seconds > 60) {
return Math.floor(seconds / 60) + 'm ' + leftPad(seconds % 60, 2, '0') + 's';
} else {
return leftPad(seconds, 2, '0') + 's';
}
};
let updateIndvProps = function (guest, callbackUpdate) {
if (fast) {
/////////////////////////////////////////////////////////////////////////////////////////////
// Update whole guest (FAST METHOD)
GuestPostgres.update(
{
TMP_id: guest.TMP_id,
},
guest,
updateError => callbackUpdate(updateError)
);
/////////////////////////////////////////////////////////////////////////////////////////////
} else {
/////////////////////////////////////////////////////////////////////////////////////////////
// One by one populate each column (SLOW METHOD BUT SHOWS ERRORS WELL)
async.eachLimit(
_.without(Object.keys(guest), 'TMP_id'),
1,
(prop, callbackProp) => {
let updateObj = {};
updateObj[prop] = guest[prop];
GuestPostgres.update(
{
TMP_id: guest.TMP_id,
},
updateObj,
(updateError) => {
if (updateError) sails.log.info('error on ' + guest.pool + ' - ' + guest.TMP_id + ' - prop: ' + prop + ' - ' + JSON.stringify(updateObj));
return callbackProp(updateError);
}
);
},
error => callbackUpdate(error)
);
/////////////////////////////////////////////////////////////////////////////////////////////
}
};
let upsertGuest = function (guest, callbackMaster) {
// Upsert - create if not exists
GuestPostgres.findOne({
TMP_id: guest.TMP_id,
}).exec((error, guestPoorFind) => {
if (error) return callbackMaster(error);
if (!guestPoorFind) {
// Doesn't exist yet in PG so first create
if (fast) {
// FAST
GuestPostgres.create(guest).exec(error => callbackMaster(error));
} else {
// SLOW METHOD
GuestPostgres.create({
TMP_id: guest.TMP_id,
}).exec(error => {
if (error) return callbackMaster(error);
updateIndvProps(guest, callbackMaster);
});
}
} else {
updateIndvProps(guest, callbackMaster);
}
});
};
//
// the main meat of the migration
//
// Get all clients
Client.find({}).sort('pool ASC').exec((error, clients) => {
async.eachLimit(
clients,
1, // one client at a time
(client, callbackClient) => {
let guestsChunkSize = 1000;
let guestsReturnedOnThisFetch = 0;
let guestsReturnedInTotal = 0;
async.doWhilst(
doWhilstCB => {
// Get all entries for that client
Guest.find({
pool: client.pool,
})
.limit(guestsChunkSize)
.skip(guestsReturnedInTotal)
.exec((error, guests) => {
guestsReturnedOnThisFetch = guests.length;
guestsReturnedInTotal += guests.length;
// calc longest client.pool length for leftPad
// sails.log.info(_.chain(days).map(client => client.pool.length).reduce((a, b) => a > b ? a : b).valueOf());
async.eachLimit(
guests,
// 1, // one at a time
20, // twenty guests at a time
(guest, callbackMaster) => {
progress++;
if (progress % (fast ? 50 : 1) === 0) {
let memUsed = bytesToString(process.memoryUsage().rss);
sails.log.info('----' + leftPad(duration(timeBegin), 10) + ' --- ' + leftPad(memUsed, 6) + ' --- ' + leftPad(progress, 7) + ' --- ' + leftPad(guest.TMP_id, 40) + ' --- ' + guest.pool);
}
return upsertGuest(guest, callbackMaster);
},
error => doWhilstCB(error)
);
});
},
() => guestsReturnedOnThisFetch === guestsChunkSize,
error => callbackClient(error)
);
},
error => {
if (error) {
console.error(error);
} else {
sails.log.info('All done!');
sails.log.info('Time taken: ' + ((Date.now() - timeBegin) / 1000) + 's');
}
}
);
});
},
Things I have looked into:
- Connection pooling is turned on
/myController/doMigration
is triggered via a single cURL command from bash, receiving an immediate res.send(‘Started’); so no issues with sockets / sessions.config/env/production.js
external hostnames have been tried with both hostname and IP address, no difference
If I tunnel through a bastion host, to make all DBs appear as if they were on ‘localhost’, while in production
mode the leak persists.
Only when I tunnel through a bastion host AND run in development
mode does the leak go away, and the 1M+ row migration can go through. There is no “leak per row”, a few fluctuations between 200-450MB, but GC picks everything up.
for example, NODE_ENV=development, RAM usage after 200,000 rows is fine:
....
2016-08-05T11:36:53.315Z - info: ---- 43m 28s --- 247MB --- 200000 --- 7zd8lJNBir --- evasbackpackers
....
A hunch would suggest that it could be related to https://github.com/SalesforceEng/tough-cookie/issues/48 as it is the same data in the heapdump.
What I can’t understand is how/why ENV=prod/dev
would make a difference?
What more information can I offer? Heapdumps? Spin up some RDS instances and populate them with garbage data to replicate?
— Wulf
About this issue
- Original URL
- State: closed
- Created 8 years ago
- Comments: 16 (13 by maintainers)
I’m the tough cookie.
Seriously @sailsbot … give a guy a break