exceljs: Failure on writing large amount of data to excel even though using streams

I am using exceljs with Streams but it still fails to export large amount of data due to Heap out of memory issue. My database table contains around 3 million records which I am trying to write into an excel. Any help would be much appreciated. Below is the piece of code and a screenshot of the error. screenshot from 2018-12-10 19-21-26

const QueryHelper = require('../../utils/query-helper');
const stream = require('stream');
const Excel = require('exceljs');
/**
 * Provides a repository for `AuditTrail`.
 * @memberof features.audit
 */
class AuditTrailRepository {

    constructor(context) {

        /**
         * @type {base.db.DbDriver}
         */
        this.connection = context.connection;

        /**
         * @type {string}
         */
        this.tableName = 'AuditTrail';

        /**
         * @type {ServerResponse}
         */
        this.response = context.response;

        /**
         * Object of the QueryHelper class
         * @type {utils.db.QueryHelper}
         */
        this.sqlForAuditTrail = new QueryHelper(this.connection.tables.auditTrail);
    }

    /**
     * Export all AuditTrail Data.
     */
    async exportAuditTrail() {
        const query = this.sqlForAuditTrail.select().build().toParam();
        let readableStream = new stream.Readable({
            objectMode: true,
            read(size) {}
        });

        // This method just pushes json objects in the readable stream as it receives streamed rows from db
        this.connection.runQueryParamStream(query.text, query.values, true, {
            readableStream: readableStream,
            objectMode: true,
            counter: 0
        });

        let columns = [{
            header: 'Id',
            key: 'Id'
        },
        {
            header: 'UserId',
            key: 'UserId'
        },
        {
            header: 'Date',
            key: 'Date'
        },
        {
            header: 'Object',
            key: 'Object'
        },
        {
            header: 'ObjectId',
            key: 'ObjectId'
        },
        {
            header: 'Action',
            key: 'Action'
        },
        {
            header: 'Message',
            key: 'Message'
        },
        {
            header: 'Data',
            key: 'Data'
        },
        {
            header: 'UserMessage',
            key: 'UserMessage'
        }
        ];

        this.response.writeHead(200, {
            'Content-disposition': 'attachment; filename="AuditTrail.xlsx"'
        });

        var options = {
            stream: this.response,
            useStyles: true,
            useSharedStrings: true
        };
        var workbook = new Excel.stream.xlsx.WorkbookWriter(options);
        var worksheet = workbook.addWorksheet('AuditTrail');
        worksheet.columns = columns;

        readableStream.pipe(new ExcelTransform({
            writableObjectMode: true,
            readableObjectMode: false,
            workbook: workbook,
            worksheet: worksheet
        }));
    }
}

class ExcelTransform extends stream.Transform {
    constructor(options) {
        super(options);
        const self = this;
        self.workbook = options.workbook;
        self.workbook.stream.on('readable', function () {
            const chunk = self.workbook.stream.read();
            self.push(chunk);
        });
        self.workbook.stream.on('error', function (error) {
            console.log('::::Error:::', error);
        });
        self.workbook.stream.on('close', function () {
            console.log(':::::End::::', ended');
        });
        self.worksheet = options.worksheet;
    }

    _transform(data, encoding, callback) {
        this.worksheet.addRow(data).commit();
        callback();
    }

    _flush(callback) {
        this.workbook.commit();
        callback();
    }
}

module.exports = AuditTrailRepository;

About this issue

  • Original URL
  • State: open
  • Created 6 years ago
  • Reactions: 2
  • Comments: 17 (2 by maintainers)

Most upvoted comments

We have resolved the issue with large amount of data with this:

...
const workbookWriter = new Excel.stream.xlsx.WorkbookWriter(options);
const worksheet = workbookWriter.addWorksheet(sheetName);

...

const row = worksheet.getRow(currentRowIndex);

...

// Code piece to fill row cells with values...

...
row.commit();

// A way to cope with high memory usage.
// Looks like rows written to stream are not transferred to file in time, 
// so memory consumption grows because rows have relations to other objects.
// Memory of related objects is not released while row is not transferred to the file.
if (currentRowIndex % 10 === 0) {
	await this.sleep(0);
}

...

private async sleep(timeMs: number): Promise<void> {
	return new Promise((ok) =>
		setTimeout(() => {
			ok();
		}, timeMs)
	);
}

...

Comment in example code is only my assumption but timeout really does the trick. Calling timeout every 10th row for performance reasons. If you want best memory usage, call it on every single row.

// I have tried using streams. streams is the best solution for heap memory issues // I have tried this method to write 10 lakhs of db data to xlsx file it worked 100%(tested)

const models = require(“…/models”); const Excel = require(“exceljs”);

try { const { count } = await models.Companies_data.findAndCountAll();

// construct a streaming XLSX workbook writer with styles and shared strings
const options = {
  filename: `./${Date.now()}-Excel.xlsx`,
  useStyles: true,
  useSharedStrings: true,
};

const workbook = new Excel.stream.xlsx.WorkbookWriter(options);
//sheet name
const ws = workbook.addWorksheet("My Sheet");
const batch_limit = 250;
const total_avg_count = Math.ceil(count / batch_limit);
//header of excel file
ws.addRow([
  "id",
  "no",
  "name",
  "rating",
  "reviews",
  "hq",
  "employee",
  "createdAt",
  "updatedAt",
]).commit();

let i = 0;
//Total batch
while (i < total_avg_count) {
  let table_data = await models.Companies_data.findAll({
    order: [["id", "ASC"]],
    offset: batch_limit * i,
    limit: batch_limit,
  });

  //batch  loop
  table_data.forEach(async values => {
    ws.addRow([
      values?.dataValues.id ?? "",
      values?.dataValues.no ?? "",
      values?.dataValues.name ?? "",
      values?.dataValues.rating ?? "",
      values?.dataValues.reviews ?? "",
      values?.dataValues.hq ?? "",
      values?.dataValues.employee ?? "",
      values?.dataValues.createdAt ?? "",
      values?.dataValues.updatedAt ?? "",
    ]).commit();
  });
  i++;
}

await workbook.commit();
//response shown in  json
reply.send({ message: "successfully created excel file" });

} catch (error) { console.log(error); reply.send(error); } };

My team is also experiencing this issue. In our case we only have around 30,000 rows, but heavy styling that creates lots of overhead.

Commenting to subscribe. If anyone else has ideas for heavy styling overhead, please let me know.