pgx: Stack overflows on upgrading from 5.0.1 to 5.0.2
After upgrading from 5.0.1 to 5.0.2, our application panics due to a stack overflow with the following stack trace:
fatal error: stack overflow
runtime stack:
runtime.throw({0x193e3b9?, 0x25fc140?})
runtime/panic.go:1047 +0x5d fp=0x7f0427ffecb8 sp=0x7f0427ffec88 pc=0x43a95d
runtime.newstack()
runtime/stack.go:1103 +0x5cc fp=0x7f0427ffee70 sp=0x7f0427ffecb8 pc=0x453fec
runtime.morestack()
runtime/asm_amd64.s:570 +0x8b fp=0x7f0427ffee78 sp=0x7f0427ffee70 pc=0x469acb
goroutine 320 [running]:
fmt.(*fmt).padString(0xc0005712f0?, {0x1948620, 0x17})
fmt/format.go:108 +0x299 fp=0xc0261003a8 sp=0xc0261003a0 pc=0x4ec699
fmt.(*fmt).fmtS(0x0?, {0x1948620?, 0xc026100420?})
fmt/format.go:359 +0x3f fp=0xc0261003e0 sp=0xc0261003a8 pc=0x4ed1bf
fmt.(*pp).fmtString(0x40c28d?, {0x1948620?, 0xc00e6dbdc0?}, 0x0?)
fmt/print.go:477 +0xc5 fp=0xc026100430 sp=0xc0261003e0 pc=0x4f0005
fmt.(*pp).handleMethods(0xc0005712b0, 0x1925b90?)
fmt/print.go:651 +0x216 fp=0xc026100680 sp=0xc026100430 pc=0x4f1356
fmt.(*pp).printArg(0xc0005712b0, {0x1771fc0?, 0xc00e6dbdc0}, 0x73)
fmt/print.go:740 +0x69b fp=0xc026100720 sp=0xc026100680 pc=0x4f201b
fmt.(*pp).doPrintf(0xc0005712b0, {0x196cb37, 0x37}, {0xc0261008e8?, 0x5, 0x5})
fmt/print.go:1057 +0x288 fp=0xc026100818 sp=0xc026100720 pc=0x4f4b08
fmt.Errorf({0x196cb37, 0x37}, {0xc0261008e8, 0x5, 0x5})
fmt/errors.go:20 +0x6c fp=0xc026100880 sp=0xc026100818 pc=0x4ebe2c
github.com/jackc/pgx/v5/pgtype.newEncodeError({0x17370e0, 0xc00e6dbd90}, 0x17370e0?, 0xe6dbd90?, 0xc0?, {0x1cebf60?, 0xc00e6dbdc0})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1958 +0x205 fp=0xc026100948 sp=0xc026100880 pc=0x8c9aa5
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1d09cb0?, 0x0?, {0x17370e0, 0xc00e6dbd90}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1971 +0x11e fp=0xc026100998 sp=0xc026100948 pc=0x8c9c3e
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbd50, {0x18cc1a0?, 0xc01b0469c0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1376 +0xbd fp=0xc026100a10 sp=0xc026100998 pc=0x8c46bd
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b4b6000?, 0xc0?, {0x18cc1a0, 0xc01b0469c0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100a60 sp=0xc026100a10 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbc90, {0x18cc1a0?, 0xc01b046950}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ad8 sp=0xc026100a60 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493f50?, 0xc0?, {0x18cc1a0, 0xc01b046950}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100b28 sp=0xc026100ad8 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbbd0, {0x18cc1a0?, 0xc01b0468f0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ba0 sp=0xc026100b28 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493ec0?, 0xc0?, {0x18cc1a0, 0xc01b0468f0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100bf0 sp=0xc026100ba0 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dbb00, {0x18cc1a0?, 0xc01b046880}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100c68 sp=0xc026100bf0 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493e30?, 0xc0?, {0x18cc1a0, 0xc01b046880}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100cb8 sp=0xc026100c68 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6dba30, {0x18cc1a0?, 0xc01b0467f0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100d30 sp=0xc026100cb8 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493da0?, 0xc0?, {0x18cc1a0, 0xc01b0467f0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100d80 sp=0xc026100d30 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db960, {0x18cc1a0?, 0xc01b046780}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100df8 sp=0xc026100d80 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493d10?, 0xc0?, {0x18cc1a0, 0xc01b046780}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100e48 sp=0xc026100df8 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db8a0, {0x18cc1a0?, 0xc01b046720}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100ec0 sp=0xc026100e48 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493c80?, 0xc0?, {0x18cc1a0, 0xc01b046720}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100f10 sp=0xc026100ec0 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db7e0, {0x18cc1a0?, 0xc01b0466b0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026100f88 sp=0xc026100f10 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493bf0?, 0xc0?, {0x18cc1a0, 0xc01b0466b0}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1974 +0x8a fp=0xc026100fd8 sp=0xc026100f88 pc=0x8c9baa
github.com/jackc/pgx/v5/pgtype.(*encodePlanDriverValuer).Encode(0xc00e6db6f0, {0x18cc1a0?, 0xc01b046620}, {0xc000ef2100, 0x0, 0x100})
github.com/jackc/pgx/v5@v5.0.2/pgtype/pgtype.go:1393 +0x195 fp=0xc026101050 sp=0xc026100fd8 pc=0x8c4795
github.com/jackc/pgx/v5/pgtype.(*Map).Encode(0x9f07bc?, 0x1b493b60?, 0xc0?, {0x18cc1a0, 0xc01b046620}, {0xc000ef2100, 0x0, 0x100})
... more of the same ...
This happens in a large application and I have not yet debugged it to a point where I can create a small reproducer.
About this issue
- Original URL
- State: closed
- Created 2 years ago
- Comments: 19 (7 by maintainers)
Commits related to this issue
- Prevent infinite loop for driver.Valuer / Codec edge case A `driver.Valuer()` results in a `string` that the `Codec` for the PostgreSQL type doesn't know how to handle. That string is scanned into wh... — committed to jackc/pgx by jackc 2 years ago
- reflect.TypeOf can return nil. Check before using https://github.com/jackc/pgx/issues/1331 — committed to jackc/pgx by jackc 2 years ago
@jackc Thanks a ton! I just cut a PR with
5.0.4
and both the panic and deadlock went away.Cheers! 🍻
I think I have found what is happening, but I’m not sure what the solution is.
Here’s how the infinite loop is occurring:
First, pgx plans how to encode the value.
uuid
and decides to use the binary format for the parameter.github.com/google/uuid.UUID
can be handled directly bypgtype.UUIDCodec
but it can’t.github.com/google/uuid.UUID
supportsdriver.Valuer
and it does. It decides on an encode plan that usesdriver.Valuer
. (https://github.com/jackc/pgx/blob/094ad9c9d830a33089b85d8dfca538c4de1dcdba/pgtype/pgtype.go#L1320)Next the actual encoding takes place.
Value()
and gets astring
back. This needs to be converted into the binary format.pgtype.UUIDCodec
to scan thestring
.pgx-google-uuid
agithub.com/google/uuid.UUID
is returned.The cause of this is a change / fix I made in
v5.0.2
for #1311 and #1319. Previously,driver.Valuer
was the last choice for encoding strategies. However, this caused problems which a type implementeddriver.Valuer
but also could be encoded viafmt.Stringer
or via underlying type unwrapping (e.g.type foo string
).driver.Valuer
should be preferred in this case. So inv5.0.2
thedriver.Valuer
test happens beforeTryWrapEncodePlanFuncs
.This means the
TryWrapUUIDEncodePlan
frompgx-google-uuid
never gets attempted because thedriver.Valuer
plan is found first.This is really messy for a few reasons.
The pgx type system is dependent on being able to determine from the Go type, the PostgreSQL type, and the desired format (text or binary) how to encode any value of that type.
driver.Valuer
wreaks havoc with that assumption. There is no way to know the type without callingValue()
. But thedriver.Valuer
could benil
or return calling it could returnnil
. It also could return different types when called on different values.The other reason is sometimes
driver.Valuer
should be tried beforeTryWrapEncodePlanFuncs
and sometimes after.Upon further reflection, the problem can be summarized as follows:
A
driver.Valuer
results in astring
that theCodec
for the PostgreSQL type doesn’t know how to encode. It falls back to scanning that string into whatever the default type for thatCodec
is. That new value is encoded. If the new value is the same type as the original type than an infinite loop occurs.c4407fb36e71015eaa2fd129d7c9a6554b038eee fixes the infinite recursion in this case by specifically checking for identical types.
It also indirectly fixes the problem by generating an error that causes the value to be encoded in text format instead of binary.
a581124dea47d1058068cc45acbfe6ff21a2534e further fixes the issue by moving the
driver.Valuer
path afterTryWrapEncodePlanFuncs
. However, all builtinTryWrapEncodePlanFuncs
now skip themselves if the type is adriver.Valuer
. This allows wrappers like that provided bypgx-google-uuid
to take precedence overdriver.Valuer
or not as they see fit. And this allows the binary format to be used again.These changes are on master now. If I can get some confirmation that it does solve the problem then a will tag a release later this week.
pgx-google-uuid
author here.Spent an evening trying to find out what is wrong with it. Found where it gets into the infinite loop, but could not get deeper. Today in the morning re-read comments here and the diff for the change that is causing the issue, and tried to get rid of the type wrapper completely, and it worked!
Modified test looks like this:
The only thing that stopped working is:
And I’m not sure if this is a problem that I should solve. So if you’re not using
Values()
forRows
just remove the wrapper library and use google UUID directly as it supports stdlib sql interfaces out-of-the-box.