pgx: Stack overflows on upgrading from 5.0.1 to 5.0.2

After upgrading from 5.0.1 to 5.0.2, our application panics due to a stack overflow with the following stack trace:

fatal error: stack overflow

runtime stack:
runtime.throw({0x193e3b9?, 0x25fc140?})
	runtime/panic.go:1047 +0x5d fp=0x7f0427ffecb8 sp=0x7f0427ffec88 pc=0x43a95d
runtime.newstack()
	runtime/stack.go:1103 +0x5cc fp=0x7f0427ffee70 sp=0x7f0427ffecb8 pc=0x453fec
runtime.morestack()
	runtime/asm_amd64.s:570 +0x8b fp=0x7f0427ffee78 sp=0x7f0427ffee70 pc=0x469acb

goroutine 320 [running]:
fmt.(*fmt).padString(0xc0005712f0?, {0x1948620, 0x17})
	fmt/format.go:108 +0x299 fp=0xc0261003a8 sp=0xc0261003a0 pc=0x4ec699
fmt.(*fmt).fmtS(0x0?, {0x1948620?, 0xc026100420?})
	fmt/format.go:359 +0x3f fp=0xc0261003e0 sp=0xc0261003a8 pc=0x4ed1bf
fmt.(*pp).fmtString(0x40c28d?, {0x1948620?, 0xc00e6dbdc0?}, 0x0?)
	fmt/print.go:477 +0xc5 fp=0xc026100430 sp=0xc0261003e0 pc=0x4f0005
fmt.(*pp).handleMethods(0xc0005712b0, 0x1925b90?)
	fmt/print.go:651 +0x216 fp=0xc026100680 sp=0xc026100430 pc=0x4f1356
fmt.(*pp).printArg(0xc0005712b0, {0x1771fc0?, 0xc00e6dbdc0}, 0x73)
	fmt/print.go:740 +0x69b fp=0xc026100720 sp=0xc026100680 pc=0x4f201b
fmt.(*pp).doPrintf(0xc0005712b0, {0x196cb37, 0x37}, {0xc0261008e8?, 0x5, 0x5})
	fmt/print.go:1057 +0x288 fp=0xc026100818 sp=0xc026100720 pc=0x4f4b08
fmt.Errorf({0x196cb37, 0x37}, {0xc0261008e8, 0x5, 0x5})
	fmt/errors.go:20 +0x6c fp=0xc026100880 sp=0xc026100818 pc=0x4ebe2c
github.com/jackc/pgx/v5/pgtype.newEncodeError({0x17370e0, 0xc00e6dbd90}, 0x17370e0?, 0xe6dbd90?, 0xc0?, {0x1cebf60?, 0xc00e6dbdc0})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1958 +0x205 fp=0xc026100948 sp=0xc026100880 pc=0x8c9aa5
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1d09cb0?, 0x0?, {0x17370e0, 0xc00e6dbd90}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1971 +0x11e fp=0xc026100998 sp=0xc026100948 pc=0x8c9c3e
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbd50, {0x18cc1a0?, 0xc01b0469c0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1376 +0xbd fp=0xc026100a10 sp=0xc026100998 pc=0x8c46bd
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b4b6000?, 0xc0?, {0x18cc1a0, 0xc01b0469c0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100a60 sp=0xc026100a10 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbc90, {0x18cc1a0?, 0xc01b046950}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ad8 sp=0xc026100a60 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493f50?, 0xc0?, {0x18cc1a0, 0xc01b046950}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100b28 sp=0xc026100ad8 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbbd0, {0x18cc1a0?, 0xc01b0468f0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ba0 sp=0xc026100b28 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493ec0?, 0xc0?, {0x18cc1a0, 0xc01b0468f0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100bf0 sp=0xc026100ba0 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbb00, {0x18cc1a0?, 0xc01b046880}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100c68 sp=0xc026100bf0 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493e30?, 0xc0?, {0x18cc1a0, 0xc01b046880}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100cb8 sp=0xc026100c68 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dba30, {0x18cc1a0?, 0xc01b0467f0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100d30 sp=0xc026100cb8 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493da0?, 0xc0?, {0x18cc1a0, 0xc01b0467f0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100d80 sp=0xc026100d30 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db960, {0x18cc1a0?, 0xc01b046780}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100df8 sp=0xc026100d80 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493d10?, 0xc0?, {0x18cc1a0, 0xc01b046780}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100e48 sp=0xc026100df8 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db8a0, {0x18cc1a0?, 0xc01b046720}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ec0 sp=0xc026100e48 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493c80?, 0xc0?, {0x18cc1a0, 0xc01b046720}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100f10 sp=0xc026100ec0 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db7e0, {0x18cc1a0?, 0xc01b0466b0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100f88 sp=0xc026100f10 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493bf0?, 0xc0?, {0x18cc1a0, 0xc01b0466b0}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100fd8 sp=0xc026100f88 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db6f0, {0x18cc1a0?, 0xc01b046620}, {0xc000ef2100, 0x0, 0x100})
	github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026101050 sp=0xc026100fd8 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493b60?, 0xc0?, {0x18cc1a0, 0xc01b046620}, {0xc000ef2100, 0x0, 0x100})
... more of the same ...

This happens in a large application and I have not yet debugged it to a point where I can create a small reproducer.

About this issue

  • Original URL
  • State: closed
  • Created 2 years ago
  • Comments: 19 (7 by maintainers)

Commits related to this issue

Most upvoted comments

@jackc Thanks a ton! I just cut a PR with 5.0.4 and both the panic and deadlock went away.

Cheers! 🍻

I think I have found what is happening, but I’m not sure what the solution is.

Here’s how the infinite loop is occurring:

First, pgx plans how to encode the value.

  1. pgx determines that the parameter OID is uuid and decides to use the binary format for the parameter.
  2. It checks if github.com/google/uuid.UUID can be handled directly by pgtype.UUIDCodec but it can’t.
  3. It then checks if github.com/google/uuid.UUID supports driver.Valuer and it does. It decides on an encode plan that uses driver.Valuer. (https://github.com/jackc/pgx/blob/094ad9c9d830a33089b85d8dfca538c4de1dcdba/pgtype/pgtype.go#L1320)

Next the actual encoding takes place.

  1. The encode plan calls Value() and gets a string back. This needs to be converted into the binary format.
  2. It uses pgtype.UUIDCodec to scan the string.
  3. Because of the Codec registration performed by pgx-google-uuid a github.com/google/uuid.UUID is returned.
  4. It tries to encode the newly scanned value. i.e. goto 2.

The cause of this is a change / fix I made in v5.0.2 for #1311 and #1319. Previously, driver.Valuer was the last choice for encoding strategies. However, this caused problems which a type implemented driver.Valuer but also could be encoded via fmt.Stringer or via underlying type unwrapping (e.g. type foo string). driver.Valuer should be preferred in this case. So in v5.0.2 the driver.Valuer test happens before TryWrapEncodePlanFuncs.

This means the TryWrapUUIDEncodePlan from pgx-google-uuid never gets attempted because the driver.Valuer plan is found first.


This is really messy for a few reasons.

The pgx type system is dependent on being able to determine from the Go type, the PostgreSQL type, and the desired format (text or binary) how to encode any value of that type. driver.Valuer wreaks havoc with that assumption. There is no way to know the type without calling Value(). But the driver.Valuer could be nil or return calling it could return nil. It also could return different types when called on different values.

The other reason is sometimes driver.Valuer should be tried before TryWrapEncodePlanFuncs and sometimes after.


Upon further reflection, the problem can be summarized as follows:

A driver.Valuer results in a string that the Codec for the PostgreSQL type doesn’t know how to encode. It falls back to scanning that string into whatever the default type for that Codec is. That new value is encoded. If the new value is the same type as the original type than an infinite loop occurs.

c4407fb36e71015eaa2fd129d7c9a6554b038eee fixes the infinite recursion in this case by specifically checking for identical types.

It also indirectly fixes the problem by generating an error that causes the value to be encoded in text format instead of binary.

a581124dea47d1058068cc45acbfe6ff21a2534e further fixes the issue by moving the driver.Valuer path after TryWrapEncodePlanFuncs. However, all builtin TryWrapEncodePlanFuncs now skip themselves if the type is a driver.Valuer. This allows wrappers like that provided by pgx-google-uuid to take precedence over driver.Valuer or not as they see fit. And this allows the binary format to be used again.

These changes are on master now. If I can get some confirmation that it does solve the problem then a will tag a release later this week.

pgx-google-uuid author here.

Spent an evening trying to find out what is wrong with it. Found where it gets into the infinite loop, but could not get deeper. Today in the morning re-read comments here and the diff for the change that is causing the issue, and tried to get rid of the type wrapper completely, and it worked!

Modified test looks like this:

original, err := uuid.NewRandom()
require.NoError(t, err)

rows, err := conn.Query(ctx, `select $1::uuid`, original)
require.NoError(t, err)

for rows.Next() {
	var val uuid.UUID
	err := rows.Scan(&val)
	require.NoError(t, err)

	require.Equal(t, original, val)
}
require.NoError(t, rows.Err())

rows, err = conn.Query(ctx, `select $1::uuid`, nil)
require.NoError(t, err)

for rows.Next() {
	var val uuid.UUID
	err := rows.Scan(&val)
	require.NoError(t, err)

	require.Equal(t, uuid.Nil, val)
}
require.NoError(t, rows.Err())

The only thing that stopped working is:

values, err := rows.Values()
require.NoError(t, err)
// values[0] is [16]uint8 here, that is byte-representation of the UUID
v0, ok := values[0].(uuid.UUID)
require.True(t, ok)

And I’m not sure if this is a problem that I should solve. So if you’re not using Values() for Rows just remove the wrapper library and use google UUID directly as it supports stdlib sql interfaces out-of-the-box.