runtime: .NET built against LTTng 2.13 crashes while initializing LTTng
From https://github.com/dotnet/runtime/issues/57784#issuecomment-985409356.
.NET built on Fedora Rawhide (36) segfaults on application start while initializing LTTng.
* frame #0: 0x00007f8885a47b82 liblttng-ust.so.1`check_event_provider + 162
frame #1: 0x00007f8885a4d4d1 liblttng-ust.so.1`lttng_ust_probe_register + 33
frame #2: 0x00007f8885b007b5 libcoreclrtraceptprovider.so`lttng_ust__events_init__DotNETRuntime() at ust-tracepoint-event.h:1198:14
frame #3: 0x00007f888683fa2e ld-linux-x86-64.so.2`call_init(l=<unavailable>, argc=10, argv=0x00007ffcd00cfd88, env=0x00007ffcd00cfde0) at dl-init.c:70:3
frame #4: 0x00007f888683fb1c ld-linux-x86-64.so.2`_dl_init(main_map=0x0000556bd608a290, argc=10, argv=0x00007ffcd00cfd88, env=0x00007ffcd00cfde0) at dl-init.c:117:5
frame #5: 0x00007f88864534c5 libc.so.6`_dl_catch_exception + 229
frame #6: 0x00007f88868437de ld-linux-x86-64.so.2`dl_open_worker at dl-open.c:821:5
frame #7: 0x00007f8886453468 libc.so.6`_dl_catch_exception + 136
frame #8: 0x00007f8886843b5c ld-linux-x86-64.so.2`_dl_open at dl-open.c:896:17
frame #9: 0x00007f888638294c libc.so.6`dlopen_doit + 92
frame #10: 0x00007f8886453468 libc.so.6`_dl_catch_exception + 136
frame #11: 0x00007f8886453533 libc.so.6`_dl_catch_error + 51
frame #12: 0x00007f888638244e libc.so.6`_dlerror_run + 142
frame #13: 0x00007f88863829d8 libc.so.6`dlopen@GLIBC_2.2.5 + 72
frame #14: 0x00007f8885fd6893 libcoreclr.so`PAL_InitializeTracing() at tracepointprovider.cpp:116:9
frame #15: 0x00007f888683fa2e ld-linux-x86-64.so.2`call_init(l=<unavailable>, argc=10, argv=0x00007ffcd00cfd88, env=0x00007ffcd00cfde0) at dl-init.c:70:3
frame #16: 0x00007f888683fb1c ld-linux-x86-64.so.2`_dl_init(main_map=0x0000556bd6060050, argc=10, argv=0x00007ffcd00cfd88, env=0x00007ffcd00cfde0) at dl-init.c:117:5
frame #17: 0x00007f88864534c5 libc.so.6`_dl_catch_exception + 229
frame #18: 0x00007f88868437de ld-linux-x86-64.so.2`dl_open_worker at dl-open.c:821:5
frame #19: 0x00007f8886453468 libc.so.6`_dl_catch_exception + 136
frame #20: 0x00007f8886843b5c ld-linux-x86-64.so.2`_dl_open at dl-open.c:896:17
frame #21: 0x00007f888638294c libc.so.6`dlopen_doit + 92
frame #22: 0x00007f8886453468 libc.so.6`_dl_catch_exception + 136
frame #23: 0x00007f8886453533 libc.so.6`_dl_catch_error + 51
frame #24: 0x00007f888638244e libc.so.6`_dlerror_run + 142
frame #25: 0x00007f88863829d8 libc.so.6`dlopen@GLIBC_2.2.5 + 72
frame #26: 0x00007f8886274ead libhostpolicy.so`pal::load_library(path="/home/tmds/rpmbuild/BUILD/dotnet-9e8b04bbff820c93c142f99a507a46b976f5c14c-x64-bootstrap/src/aspnetcore.ae1a6cbe225b99c0bf38b7e31bf60cb653b73a52/artifacts/source-build/self/package-cache/microsoft.netcore.app.crossgen2.linux-x64/6.0.0/tools/libcoreclr.so", dll=0x00007f888629e0a0) at pal.unix.cpp:230:12
...
The crash happens at this line: https://github.com/lttng/lttng-ust/blob/4c155a06d838e1ab5d385abd1d73ae56e71b7d5e/src/lib/lttng-ust/lttng-probes.c#L153.
The field is null.
(gdb) p *tp_class
$3 = {struct_size = 48, fields = 0x7ffff73ab2e0 <lttng_ust__event_fields___DotNETRuntime___GCStart>, nr_fields = 2,
probe_callback = 0x7ffff7364820 <lttng_ust__event_probe__DotNETRuntime___GCStart(void*, unsigned int, unsigned int)>,
signature = 0x7ffff738e720 <__tp_event_signature___DotNETRuntime___GCStart> "const unsigned int, Count, const unsigned int, Reason", probe_desc = 0x7ffff73a1470 <lttng_ust__probe_desc___DotNETRuntime>}
(gdb) p tp_class->fields[0]
$4 = (const struct lttng_ust_event_field * const) 0x0
(gdb) p tp_class->fields[1]
$5 = (const struct lttng_ust_event_field * const) 0x0
These fields get initialized dynamically.
static const struct lttng_ust_event_field * const lttng_ust__event_fields___DotNETRuntime___GCStart[] = { new (const struct lttng_ust_event_field) { .struct_size = sizeof(struct lttng_ust_event_field), .name = "Count", .type = ((struct lttng_ust_type_common *) new (struct lttng_ust_type_integer) { .parent = { .type = lttng_ust_type_integer, }, .struct_size = sizeof(struct lttng_ust_type_integer), .size = sizeof(unsigned int) * 8, .alignment = 1 * 8, .signedness = (std::is_signed<unsigned int>::value), .reverse_byte_order = 1234 != 1234, .base = 10, }), .nowrite = 0, .nofilter = 0, }, new (const struct lttng_ust_event_field) { .struct_size = sizeof(struct lttng_ust_event_field), .name = "Reason", .type = ((struct lttng_ust_type_common *) new (struct lttng_ust_type_integer) { .parent = { .type = lttng_ust_type_integer, }, .struct_size = sizeof(struct lttng_ust_type_integer), .size = sizeof(unsigned int) * 8, .alignment = 1 * 8, .signedness = (std::is_signed<unsigned int>::value), .reverse_byte_order = 1234 != 1234, .base = 10, }), .nowrite = 0, .nofilter = 0, }, new (const struct lttng_ust_event_field) { .struct_size = sizeof(struct lttng_ust_event_field), .name = "dummy", .type = ((struct lttng_ust_type_common *) new (struct lttng_ust_type_integer) { .parent = { .type = lttng_ust_type_integer, }, .struct_size = sizeof(struct lttng_ust_type_integer), .size = sizeof(int) * 8, .alignment = 1 * 8, .signedness = (std::is_signed<int>::value), .reverse_byte_order = 1234 != 1234, .base = 10, }), .nowrite = 0, .nofilter = 0, }, }; static const struct lttng_ust_tracepoint_class lttng_ust__event_class___DotNETRuntime___GCStart = { .struct_size = sizeof(struct lttng_ust_tracepoint_class), .fields = lttng_ust__event_fields___DotNETRuntime___GCStart, .nr_fields = (sizeof(lttng_ust__event_fields___DotNETRuntime___GCStart) / sizeof((lttng_ust__event_fields___DotNETRuntime___GCStart)[0])) - 1, .probe_callback = (void (*)(void)) <tng_ust__event_probe__DotNETRuntime___GCStart, .signature = __tp_event_signature___DotNETRuntime___GCStart, .probe_desc = <tng_ust__probe_desc___DotNETRuntime, };
It seems they have not been initialized (yet):
(gdb) p lttng_ust__event_fields___DotNETRuntime___GCStart
$1 = {0x0, 0x0, 0x0}
cc @omajid @janvorli @hoyosjs @am11 @brianrob @dotnet/dotnet-diag
About this issue
- Original URL
- State: closed
- Created 3 years ago
- Comments: 16 (9 by maintainers)
Commits related to this issue
- Fix: generate probe registration constructor as a C++ constuctor Observed issue ============== Applications which transitively dlopen() a library which, in turn, dlopen() providers crash when they a... — committed to lttng/lttng-ust by jgalar 3 years ago
- Fix: generate probe registration constructor as a C++ constuctor Observed issue ============== Applications which transitively dlopen() a library which, in turn, dlopen() providers crash when they a... — committed to lttng/lttng-ust by jgalar 3 years ago
Indeed, the fixes are present in the lttng-ust 2.13.1 release. It is fixed in two ways:
First, we made sure that clang does not allocate compound literal on the heap in C++ in a typical build. This is fixed by commit a11ff47e2a6 (“fix: allocating C++ compound literal on heap with Clang”). However, if someone builds with LTTNG_UST_ALLOCATE_COMPOUND_LITERAL_ON_HEAP defined, the constructor ordering is still an issue. This is why we have also fixed the underlying constructor order issue. This is fixed by commit 90fe47efbc1 (“Fix: generate probe registration constructor as a C++ constuctor”).
As indicated in the LTTng bug tracker, please try https://review.lttng.org/c/lttng-ust/+/6870 and let us know if it improves the situation after rebuilding the tracepoint probe provider.
You need to upgrade to the new lttng-ust 2.13.1 (or any newer version) and rebuild the .NET runtime probe providers against that upgraded lttng-ust to correct the problem.
I confirm that the issue sat squarely within lttng-ust, so I don’t expect anything to be needed in the .NET runtime to fix this, except rebuilding the .NET runtime probe providers against a fixed lttng-ust.