sequelize: Limit is broken for findAll and findAndCountAll with include

Consider we have a two simple models “Events” and “Tags” with many-to-many relation and we want to get all events with specific tag. And we want to limit maximum number of response items.

(Consider you are using NodeJS 7.6+ with async/await or Babel with async/await plugin)

'use strict'

const Sequelize = require('sequelize')

const dbConfig = {
    type: 'mysql', user: 'root', password: 'root',
    database: 'db', host: 'localhost'
}

;(async () => {
    const sequelize = new Sequelize(
        dbConfig.database, dbConfig.user, dbConfig.password, {
            host: dbConfig.host,
            dialect: dbConfig.type,
            pool: {max: 100, min: 0, idle: 10000},
            dialectOptions: {
                charset: 'utf8_general_ci',
            }
        }
    )
    await sequelize.authenticate()

    const Events = sequelize.define('events', {
        id: {type: Sequelize.INTEGER, primaryKey: true, autoIncrement: true},
        title: {type: Sequelize.STRING}
    }, {timestamps: false})

    const Tags = sequelize.define('tags', {
        name: {type: Sequelize.STRING}
    }, {timestamps: false})

    const EventsTags = sequelize.define('events_tags', {}, {timestamps: false})

    Events.belongsToMany(Tags, {through: EventsTags})
    Tags.belongsToMany(Events, {through: EventsTags})

    await sequelize.sync()

    await Promise.all([
        Events.bulkCreate([
            {title: 'event_1'}, {title: 'event_2'},
            {title: 'event_3'}, {title: 'event_4'}
        ]),
        Tags.bulkCreate([
            {name: 'tag_1'}, {name: 'tag_2'},
            {name: 'tag_3'}, {name: 'tag_4'},
        ])
    ])
    await EventsTags.bulkCreate([
        {eventId: 1, tagId: 1}, {eventId: 2, tagId: 2},
        {eventId: 3, tagId: 3}, {eventId: 4, tagId: 1}
    ])

    const eventsWithTag1 = await Events.findAndCountAll({
        limit: 3,
        include: [{
            model: Tags,
            where: {name: 'tag_1'}
        }]
    })

    console.log('found events with tag_1:', eventsWithTag1.rows.length) // 1
    console.log('count events with tag_1:', eventsWithTag1.count) // 2

    await sequelize.close()
})()

This query will found only one event: console.log('found events with tag_1:', eventsWithTag1.rows.length) is logging 1

but it should found two: console.log('found events with tag_1:', eventsWithTag1.rows.length) must log 2 because there are two of them and limit is 3

If we remove limit or set it to 4+ or change the sequence of EventsTags creation to

EventsTags.bulkCreate([
  {eventId: 1, tagId: 1}, {eventId: 2, tagId: 2},
  {eventId: 4, tagId: 1}, {eventId: 3, tagId: 3}
])

results in that the request will found both events console.log('found events with tag_1:', eventsWithTag1.rows.length) will log 2

So limit should limit the maximum number of records in the result, but it limits the number of records in intermediate query and final result is unpredictable.

If you put findAll instead of findAndCountAll you will face the same problem

This is generated SQL for the last select query:

SELECT
  `events`.*, `tags`.`id` AS `tags.id`, `tags`.`name` AS `tags.name`,
  `tags.events_tags`.`eventId` AS `tags.events_tags.eventId`,
  `tags.events_tags`.`tagId` AS `tags.events_tags.tagId`
FROM
  (
    SELECT
      `events`.`id`, `events`.`title`
    FROM
      `events` AS `events`
    WHERE
      (
        SELECT
          `events_tags`.`eventId`
        FROM
          `events_tags` AS `events_tags` 
        INNER JOIN
            `tags` AS `tag`
          ON
            `events_tags`.`tagId` = `tag`.`id`
        WHERE
          (
            `events`.`id` = `events_tags`.`eventId`
          ) LIMIT 1 
      ) IS NOT NULL LIMIT 3
  ) AS `events`
INNER JOIN
    (
      `events_tags` AS `tags.events_tags`
      INNER JOIN
          `tags` AS `tags`
        ON
          `tags`.`id` = `tags.events_tags`.`tagId`
    )
  ON
      `events`.`id` = `tags.events_tags`.`eventId`
    AND
      `tags`.`name` = 'tag_1';

I have try this on MySQL 5.7 and Sequelize 4.0.0-2 and 3.30.2

About this issue

  • Original URL
  • State: closed
  • Created 7 years ago
  • Reactions: 23
  • Comments: 61 (16 by maintainers)

Commits related to this issue

Most upvoted comments

I am also getting same issue with limit in findAll() even though I have added subQuery: false. Are there any ways for me to get correct number of results?

As mentioned in https://github.com/sequelize/sequelize/issues/4716 - workaround is to add subQuery: false. Also https://github.com/sequelize/sequelize/issues/5715 is looks like related

I am also encountering this issue, it’s becoming a real pain point with the current application stack I am working on, query is necessary to have the includes for proper tagging and filtering functionality but this throws off the limit param to essentially be useless and random. Removing subQuery: false will resolve the limit issues but will then introduce issues with the tag include not working and returning invalid counts. Eg. Count: 1 but no rows.

File.findAndCountAll({
		distinct: true,
		subQuery: false, // This option is required for tag (include) functionality to work, 100% undocumented in sequalize. - Austin K. Smith 10/9/17
		offset: (req.query.start ? parseInt(req.query.start, 10) : 0),
		limit: (req.query.number ? parseInt(req.query.number, 10) : 12),
		where: _where,
		order: [
			[req.query.sort, req.query.order]
		],
		include: [
			{ 
				model: FileCategory, 
				as: 'categories', 
				through: 'fileFileCategories', 
				foreignKey: 'fileId', 
				where: _filters.category ? {
					id: _filters.category.id 
				} : null },
			{ 
				model: FileTag, 
				as: 'tags', 
				through: 'fileFileTags', 
				foreignKey: 'fileId',
				where: _filters.tag ? {
					id: _filters.tag.id 
				} : null },
			{ 
				model: Role, 
				as: 'roles', 
				through: 'fileRoles', 
				foreignKey: 'fileId', 
				where: _roleWhere, 
				required: false 
			}
		]
	}).then(function (files) {
		if (!files) {
			return res.status(404).send({
				message: 'No files found'
			});
		} else {
			return res.json(files);
		}
	}).catch(function (err) {
		winston.error('files.server.controller', err);
		// not secure send only generic message
		return res.status(500).send('files.server.controller error');
	});

Sub Query false is not an acceptable solution as this also causes problems with distinct: true not working properly and thus ending up with duplicate records that I have to manually strip out of the final result set, this is both inefficient and problematic as it then makes the pagination “remaining” count inaccurate.

This is still a problem in sequelize v6 and v7, any updates on this issue ?

@sushantdhiman it looks like there are a lot of people who are stumbling on this one. Could you please add a bug label on it so when someone else would search on it he would see that this issue is really about something

Any update on the bug resolution? Any alternatives that we could take advantage of in the meantime?

Submitted a PR for this, let’s see if we can get this rolled in. I think? @Alexsey 's solution will work just fine.

As I mentioned above the problem is that limit is applying to subquery instead of query. @mickhansen if we change in your code

// Add LIMIT, OFFSET to sub or main query
const limitOrder = this.addLimitAndOffset(options, mainTable.model);
if (limitOrder && !options.groupedLimit) {
  if (subQuery) {
    subQueryItems.push(limitOrder);
  } else {
    mainQueryItems.push(limitOrder);
  }
}

to

// Add LIMIT, OFFSET to main query
const limitOrder = this.addLimitAndOffset(options, mainTable.model);
if (limitOrder && !options.groupedLimit) {
  mainQueryItems.push(limitOrder);
}

the bug goes away. Can you please comment may this be the right solution or will this lead to a new bugs?

After about a week of hell found acceptable workaround for my case. Believe it would be helpful as found a lot of unanswered topics/issues on github.

TL;DR; actual solution is at the end of post, just the last piece of code.

The main idea is that Sequelize builds correct SQL query, but when having left joins we produce carthesian product, so there will be a lot of rows as query result.

Example: A and B tables. Many to many relation. If we want to get all A joined with B we will receive A * B rows, so there will be a lot of rows for each record from A with different values from B.

CREATE TABLE IF NOT EXISTS a (
	id INTEGER PRIMARY KEY NOT NULL,
	title VARCHAR
)

CREATE TABLE IF NOT EXISTS b (
	id INTEGER PRIMARY KEY NOT NULL,
	age INTEGER
)

CREATE TABLE IF NOT EXISTS ab (
	id INTEGER PRIMARY KEY NOT NULL,
	aid INTEGER,
	bid INTEGER
)

SELECT *
FROM a
LEFT JOIN (ab JOIN b ON b.id = ab.bid) ON a.id = ab.aid

In sequelize syntax:

class A extends Model {}
A.init({
	id: {
      type: Sequelize.INTEGER,
      autoIncrement: true,
	  primaryKey: true,
	},
	title: {
      type: Sequelize.STRING,
	},
});

class B extends Model {}
B.init({
	id: {
	  type: Sequelize.INTEGER,
      autoIncrement: true,
	  primaryKey: true,
	},
	age: {
      type: Sequelize.INTEGER,
	},
});

A.belongsToMany(B, { foreignKey: ‘aid’, otherKey: ‘bid’, as: ‘ab’ });
B.belongsToMany(A, { foreignKey: ‘bid’, otherKey: ‘aid’, as: ‘ab’ });

A.findAll({
	distinct: true,
	include: [{ association: ‘ab’ }],
})

Everything works ok.

So, imagine i want to receive 10 records from A with mapped to them records from B. When we put LIMIT 10 on this query, Sequelize build correct query but LIMIT is applied to whole query and as result we receive only 10 rows , where all of them could be for only one record from A. Example:

A.findAll({
	distinct: true,
	include: [{ association: ‘ab’ }],
	limit: 10,
})

Which will be converted into:

SELECT *
FROM a
LEFT JOIN (ab JOIN b ON b.id = ab.bid) ON a.id = ab.aid
LIMIT 10

id  |  title    |   id  |  aid  |  bid  |  id   |  age
--- |  -------- | ----- | ----- | ----- | ----- | -----
1	|	first	| 	1	|	1	|	1   |	1   |	1
1	|	first	| 	2	|	1	|	2   |	2   |	2
1	|	first	| 	3	|	1	|	3   |	3   |	3
1	|	first	| 	4	|	1	|	4   |	4   |	4
1	|	first	| 	5	|   1	|	5   |	5   |	5
2	|	second	| 	6	|	2	|	5   |	5   |	5
2	|	second	| 	7	|	2	|	4   |	4   |	4
2	|	second	| 	8	|	2	|	3   |	3   |	3
2	|	second	| 	9	|	2	|	2   |	2   |	2
2	|	second	| 	10	|	2	|	1   |	1   |	1

After output is received, Seruqlize as ORM will make data mapping and over query result in code will be:

[
 {
  id: 1,
  title: 'first',
  ab: [
   { id: 1, age:1 },
   { id: 2, age:2 },
   { id: 3, age:3 },
   { id: 4, age:4 },
   { id: 5, age:5 },
  ],
 },
  {
  id: 2,
  title: 'second',
  ab: [
   { id: 5, age:5 },
   { id: 4, age:4 },
   { id: 3, age:3 },
   { id: 2, age:2 },
   { id: 1, age:1 },
  ],
 }
]

Obviously NOT what we wanted. I wanted to receive 10 records for A, but received just 2, while i know that there are more in database.

So we have correct SQL query but still received incorrect result.

Ok, i had some ideas but the easiest and most logical is:

  1. Make first request with joins, and group results by source table (table on which we are making query and to which making joins) ‘id’ property. Seems easy…

    To make so we need to provide ‘group’ property to Sequelize query options. Here we have some problems. First - Sequelize makes aliases for each table while generating SQL query. Second - Sequelize puts all columns from JOINED table into SELECT statement of its query and passing ‘attributes’ = [] won’t help. In both cases we’ll receive SQL error.

    To solve first we need to convert Model.tableName to singluar form of this word (this logic is based on Sequelize). Just use pluralize.singular(). Then compose correct property to GROUP BY:

    const tableAlias = pluralize.singular('Industries') // Industry
    
    {
     ...,
     group: [`${tableAlias}.id`]
    }
    

    To solve second (it was the hardest and the most … undocumented). We need to use undocumented property ‘includeIgnoreAttributes’ = false. This will remove all columns from SELECT statement unless we specify some manually. We should manually specify attributes = [‘id’] on root query.

  2. Now we will receive correctly output with only necessary resources ids. Then we need to compose seconf query WITHOUT limit and offset, but specify additional ‘where’ clause:

{
 ...,
 where: {
  ...,
  id: Sequelize.Op.in: [array of ids],
 }
}
  1. With query about we can produce correct query with LEFT JOINS.

Solution Method receives model and original query as arguments and returns correct query + additionally total amount of records in DB for pagination. It also correctly parse query order to provide ability to order by fields from joined tables:

/**
   *  Workaround for Sequelize illogical behavior when quering with LEFT JOINS and having LIMIT / OFFSET
   *
   *  Here we group by 'id' prop of main (source) model, abd using undocumented 'includeIgnoreAttributes'
   *  Sequelize prop (it is used in its static count() method) in order to get correct SQL request
   *  Witout usage of 'includeIgnoreAttributes' there are a lot of extra invalid columns in SELECT statement
   *
   *  Incorrect example without 'includeIgnoreAttributes'. Here we will get correct SQL query
   *  BUT useless according to business logic:
   *
   *  SELECT "Media"."id", "Solutions->MediaSolutions"."mediaId", "Industries->MediaIndustries"."mediaId",...,
   *  FROM "Medias" AS "Media"
   *  LEFT JOIN ...
   *  WHERE ...
   *  GROUP BY "Media"."id"
   *  ORDER BY ...
   *  LIMIT ...
   *  OFFSET ...
   *
   *  Correct example with 'includeIgnoreAttributes':
   *
   *  SELECT "Media"."id"
   *  FROM "Medias" AS "Media"
   *  LEFT JOIN ...
   *  WHERE ...
   *  GROUP BY "Media"."id"
   *  ORDER BY ...
   *  LIMIT ...
   *  OFFSET ...
   *
   *  @param model - Source model (necessary for getting its tableName for GROUP BY option)
   *  @param query - Parsed and ready to use query object
   */
  private async fixSequeliseQueryWithLeftJoins<C extends Model>(
    model: ModelCtor<C>, query: FindAndCountOptions,
  ): IMsgPromise<{ query: FindAndCountOptions; total?: number }> {
    const fixedQuery: FindAndCountOptions = { ...query };

    // If there is only Tenant data joined -> return original query
    if (query.include && query.include.length === 1 && (query.include[0] as IncludeOptions).model === Tenant) {
      return msg.ok({ query: fixedQuery });
    }

    // Here we need to put it to singular form,
    // because Sequelize gets singular form for models AS aliases in SQL query
    const modelAlias = singular(model.tableName);

    const firstQuery = {
      ...fixedQuery,
      group: [`${modelAlias}.id`],
      attributes: ['id'],
      raw: true,
      includeIgnoreAttributes: false,
      logging: true,
    };

    // Ordering by joined table column - when ordering by joined data need to add it into the group
    if (Array.isArray(firstQuery.order)) {
      firstQuery.order.forEach((item) => {
        if ((item as GenericObject).length === 2) {
          firstQuery.group.push(`${modelAlias}.${(item as GenericObject)[0]}`);
        } else if ((item as GenericObject).length === 3) {
          firstQuery.group.push(`${(item as GenericObject)[0]}.${(item as GenericObject)[1]}`);
        }
      });
    }

    return model.findAndCountAll<C>(firstQuery)
      .then((ids) => {
        if (ids && ids.rows && ids.rows.length) {
          fixedQuery.where = {
            ...fixedQuery.where,
            id: {
              [Op.in]: ids.rows.map((item: GenericObject) => item.id),
            },
          };
          delete fixedQuery.limit;
          delete fixedQuery.offset;
        }

        /* eslint-disable-next-line */
        const total = (ids.count as any).length || ids.count;

        return msg.ok({ query: fixedQuery, total });
      })
      .catch((err) => this.createCustomError(err));
  }

bump

Unfortunately I too am in the situation where this issue is causing headaches in our product (we’ve worked around it by doing pagination in the application after loading all rows from the DB but it won’t scale for us much longer), and while I would like to submit a PR for the fix I just don’t have enough knowledge of Sequelize under the hood.

However I would be very happy to contribute to a “bug bounty” if this might help us to get a fix in place for this. Is this something that would be of interest to any experienced Sequelize contributors? I would happily put GBP 100 into the pot. Perhaps any others facing this issue who could contribute can speak up too. Apologies in advance if this is deemed not appropriate by the maintainers!

Yeah for the record adding subQuery: false resolved my problem as well.

@mickhansen agree and knex is quite irrelevant to this issue.

For me I am perfectly fine with sequelize to only query 1:1 relationship. The problem with sequelize is it give documentation on using function that give inconsistence result. This result normally can only be found out when you are querying against a lot of pages.

As a library with so much usage, I really think it should add obvious documentation/warning message for developer (as I don’t think this is a bug that can be quickly solved).

Have encountered this issue in production code using Postgres. Seems like sequelize have problem including 1:m or m:m relationship.

IMHO, if this bug cannot be resolved soon, a warning message should be shown in the console.log whenever 1:m or m:m include is being used with limit. I’ll rather unable to include 1:m or m:m at all if the result is inconsistence.

@Alexsey subQuery: false can help, but can lead to other issues. For example, I’m noticing an incorrect count for some of my queries now.

#9940 Same Issue,

Job.belongstoMany(
    TechnicianUser,{
        as:'technicians',
        through: TechnicianJob,
        onDelete:'',
        onUpdate:'',
});

TechnicianUser.belongsToMany(
    Job,{
        as:'jobs',
        through: TechnicianJob,
        onDelete:...,
        onUpdate:...,
});

/// Find jobs
Job.findCountAll({
    limit:limit && parseInt(limit),
    offset:offset && parseInt(limit),
    distinct:true,
    col:'id',
    subquery:false,
    where:{
        '$technicians.first_name$':{[Op.subString]:keyword},
    },
    include:[
        {
            model:TechnicianUser,
            as:'technicians',
            through:{
                ....
            }
        }
    ]
});

-> Expected result

  •  count:6,
      rows:[
          {...},<!-- 1 -->
          {...},<!-- 2 -->
          {...},<!-- 3 -->
          {...},<!-- 4 -->
          {...},<!-- 5 -->
      ]
    

-> Actual result

  •  count:6,
      rows:[
          {...},<!-- 1 -->
          {...},<!-- 2 -->
          {...},<!-- 3 -->
          {...},<!-- 4 -->
      ]
    

Some fileds missing but count same

I am facing the same issue. I took a quick look at the sequelize code and it seems to me the easiest way to allow us to set a limit on the main query instead of the inner subquery is just an extra option, say isGlobalLimit: boolean.

If you take a look here

https://github.com/sequelize/sequelize/blob/master/lib/dialects/abstract/query-generator.js#L1329

notice how if subquery is true (which is the case when we have associations) we add the limit to the subQueryItems and if it’s false, we add it to mainQueryItem.

The easiest way to resolve this problem seems to just change that check to something like

if (subQuery && !options.isGlobalLimit) {
    subQueryItems.push(limitOrder);
} else {
    mainQueryItems.push(limitOrder);
}

so that if the user added isGlobalLimit: true to their options, the limit would still get applied to the main query because that’s the only possible way you can get pagination working right with sequelize if you have any non-trivial filtering and sorting conditions.

@jamiesoncj count me in as well. If this can help us get this issue resolved, I am willing to contribute $120 to towards the bug as well.

I don’t think this issue is going away anytime soon as there is no work being done on this. Don’t get me wrong, appreciate Sequelize as an open source project, but when you are using something this popular, you wouldn’t expect an issue this major to exist and be ignore. Before somebody tells me to submit a PR myself, trust me I would but I don’t have the SQL expertize require to handle this.

We’ve been working on a big API and are now testing knex.js along with Objection.js, this is a huge setback in terms of time and resources which would be spent on re-doing things the knex.js way.

If I understand well, your limit and offset is broken by the join, I think you can use group by Events.id and you will get a good result, but instead of count you will get an array of counts and the real count is the length of the array. something like this:

{ count: [ { count: '3' }, { count: '1' }, { count: '1' } ], rows: [ { dataValues: [Object], Tags: [Array] }, { dataValues: [Object], Tags: [Array] }, { dataValues: [Object], Tags: [Array] } ] }

I know I’m late to the party, but I think is the only simple way to solve this… I tried it on sequelize v5.10.0

I think I may be facing the same problem as @ruanjiayou (if I understand their post correctly)…

@SirWojtek’s idea to add distinct: true solves count problems for some of our queries, but it breaks other queries by generating SQL like this:

SELECT count(DISTINCT(*)) AS "count" FROM ...

which leads to syntax error at or near "*".

I vaguely recall another post saying that distinct: true only needed to be applied to specific queries, but I forget what that distinction was. Maybe it had to do with the presence or absence of subqueries?

EDIT: This breaks because col is set to “*” when the query doesn’t have includes set:

https://github.com/sequelize/sequelize/blob/v3/lib/model.js#L1612

So the fix is to always ensure that the includes property is set, even if it’s just an empty array.

Moved to knex.js with objection.js, findAndCountAll (with 5 asssociations included) was taking ~3000ms in sequelize is taking me ~400ms with knex.

Also, sequelize.js’s findAndCount all has distinct applied to the count query but not the select query which leads to duplicate results with options.include.

Thanks @Alexsey for finding that on Bountysource - I have added my USD 150 bounty there and see you have also added a bounty… hopefully someone will find it appetising enough to take on the issue 😄

I found another workaround working for me. I’ve just added distinct: true to my findAndCountAll query.

Hm… it looks like to fix the query we need to move AND `tags`.`name` = 'tag_1' from the tail to the inner query before the LIMIT 1 (renaming `tags` to `tag`)

Looks like this

SELECT `events`.*, `tags`.`id` AS `tags.id`, `tags`.`name` AS `tags.name`, 
            `tags.events_tags`.`eventId` AS `tags.events_tags.eventId`, 
            `tags.events_tags`.`tagId` AS `tags.events_tags.tagId` 
FROM (SELECT `events`.`id`, `events`.`title` 
    FROM `events` AS `events` WHERE (
        SELECT `events_tags`.`eventId` 
        FROM `events_tags` AS `events_tags` 
        INNER JOIN `tags` AS `tag` ON `events_tags`.`tagId` = `tag`.`id` 
        WHERE (`events`.`id` = `events_tags`.`eventId`) AND `tag`.`name` = 'tag_1' LIMIT 1 
    ) IS NOT NULL LIMIT 2
) AS `events` 
INNER JOIN (`events_tags` AS `tags.events_tags` 
    INNER JOIN `tags` AS `tags` ON `tags`.`id` = `tags.events_tags`.`tagId`)
ON `events`.`id` = `tags.events_tags`.`eventId`

is the right query. Some one who know the code may fix this. I think it is the same issue as https://github.com/sequelize/sequelize/issues/6601 and may be some others