nodejs-storage: createReadStream doesn't release enough memory since version 2.2.0

Environment details

  • OS: Google AppEngine (production) / macOS 10.13.6 (development)
  • Node.js version: 10.15.3
  • npm version: 6.9.0
  • @google-cloud/storage version: 2.2.0 and up

Hi guys, I’m running into this issue when using the file.createReadStream method. It seems this problem was introduced in version 2.2.0 and is present in all version since then (including version 2.5.0). I’m using version 2.1.0 to avoid running into this issue.

My specific use case:

  • I’m streaming files that are between 100MB and 200MB in size.
  • I’m using the node-throttle module.
  • I’m piping the stream into the HTTP response of Express.js
bucket
    .file(filename)
    .createReadStream({ start, end })
    .on("error", err => {
        if (err.code === "ESOCKETTIMEDOUT") {
            logger.warn("Error reading file stream", err);
        } else {
            logger.error("Error reading file stream", err);
        }
    })
    .pipe(throttle)
    .pipe(res);

Some measurements I made using the memory-usage module:

  • v2.1.0: Graph Google Cloud Storage v2 1 0

  • v2.2.0: Graph Google Cloud Storage v2 2 0

  • v2.4.0: Graph Google Cloud Storage v2 4 0

  • v2.5.0: Graph Google Cloud Storage v2 5 0

As you can see, the version 2.1.0 graph drops memory usage, while the later versions increase linearly over time and never release the memory back, even after stopping streaming.

About this issue

  • Original URL
  • State: closed
  • Created 5 years ago
  • Reactions: 1
  • Comments: 34 (19 by maintainers)

Most upvoted comments

I did a rewrite of the script to remove Express and the browser integration for the memory UI. I can reproduce what looks like memory retention when using master (internal request library: fetch via teeny-request), and see it disappear when using 2.1.0 (internal request library: request). Setting the range doesn’t have an effect.

I tested again with a larger file (100 MB), and memory management is different, however, neither retain beyond +5MB from the starting position.

Theories so far:

  • Node stream internals make different memory retention judgements based on demand
  • node-throttle is bringing to light that something is wrong with backpressure in some internal stream from the request library

More testing needed.

{
  "name": "gissue-685",
  "version": "1.0.0",
  "description": "",
  "main": "index.js",
  "scripts": {
    "test": "node --expose-gc ."
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "dependencies": {
    "@google-cloud/storage": "github:googleapis/nodejs-storage",
    "throttle": "^1.0.3"
  }
}

'use strict'

const fs = require('fs')
const {Storage} = require('@google-cloud/storage')
const Throttle = require('throttle')

const BASE_MEMORY = getMemory()

const storage = new Storage()
const bucket = storage.bucket('gcp-public-data-landsat')
const file = bucket.file('index.csv.gz')

file
  .createReadStream({end: 1e+8}) // 100 MB
  .pipe(new Throttle(24 * 1024))
  .on('data', () => {})
  .on('end', () => {
    console.log('Ended')
    setTimeout(() => {
      process.exit()
    }, 1000)
  })

function getMemory() {
  return Math.round(process.memoryUsage().heapUsed / 1000000)
}

function logMemoryUsage() {
  console.log('Memory usage:', getMemory() - BASE_MEMORY + ' mb')
}

setInterval(logMemoryUsage, 1000)
$ npm t

Sure. Here’s the complete implementation:

const { Storage } = require("@google-cloud/storage");
const Throttle = require("throttle");
const express = require("express");

const app = express();
const storage = new Storage({ projectId: "{PROJECT_ID}" });
const bucket = storage.bucket("{BUCKET_NAME}");

app.get("/", async (req, res) => {
  const { filename } = req.query;
  const throttle = new Throttle(24 * 1024);

  const file = bucket.file(filename);
  const data = await file.getMetadata();
  const fileSize = data[0].size;

  const start = 0;
  const end = fileSize - 1;

  res.status(200);
  res.set({
    "Content-Length": fileSize,
    "Content-Range": `${start}-${end}/${fileSize}`,
    "Accept-Ranges": "bytes",
    Connection: "Keep-Alive",
    "Keep-Alive": "timeout=2, max=100",
    "Content-Type": "audio/mp3",
    "X-Accel-Buffering": "no"
  });

  file
    .createReadStream({ start, end })
    .on("error", err => {
      console.error("Error reading file stream", err);
    })
    .pipe(throttle)
    .pipe(res);
});

const server = app.listen(process.env.PORT || 3000, () =>
  console.info(`Server running on port ${server.address().port}`)
);

You should call it with GET http://localhost:3000/?filename={FILE_NAME}.mp3 (you need to stream an MP3 file - I use 128kbps MP3’s)

I hope this helps!