statsd_exporter: panic: runtime error: makeslice: cap out of range

Running a few statsd_exporters behind a statsd relay, and noticed process would occasionally drop out and return a few seconds later, leading to a dip in total metrics received further down the line.

# statsd_exporter --version

statsd_exporter, version 0.12.1 (branch: HEAD, revision: 6635ea71e80f92b20d9863e01a2c6bc9339dbd64)
  build user:       root@af5b518ef75a
  build date:       20190708-08:56:16
  go version:       go1.12.6 

This is running on an AWS instance, t2.xlarge, running Debian stretch 9.9. Binary comes from github release, not self built. Let me know if there’s anything else I can provide or do.

GOTRACEBACK=all ::

panic: runtime error: makeslice: cap out of range
goroutine 50 [running]:
main.(*registry).hashLabels(0xc00010b6c0, 0xc00beec058, 0x0, 0x1, 0x0, 0x408bfc, 0xc000010000)
#011/app/registry.go:384 +0xb5
main.(*registry).getGauge(0xc00010b6c0, 0xc00bf096a0, 0xf, 0xc00beec058, 0x9bf159, 0x28, 0xc009affc20, 0xa71d01, 0xc003775500, 0x0, ...)
#011/app/registry.go:222 +0x50
main.(*Exporter).handleEvent(0xc00010cbe0, 0xa6ac60, 0xc00bf19200)
#011/app/exporter.go:189 +0xe6d
main.(*Exporter).Listen(0xc00010cbe0, 0xc000120720)
#011/app/exporter.go:125 +0x16e
created by main.main
#011/app/main.go:264 +0x20bf
goroutine 1 [chan receive, 2 minutes]:
main.main()
#011/app/main.go:266 +0x20d9
goroutine 39 [IO wait, 2 minutes]:
internal/poll.runtime_pollWait(0x7f0cad0e0e50, 0x72, 0x0)
#011/usr/local/go/src/runtime/netpoll.go:182 +0x56
internal/poll.(*pollDesc).wait(0xc0000fe318, 0x72, 0x0, 0x0, 0x9ab406)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x9b
internal/poll.(*pollDesc).waitRead(...)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Accept(0xc0000fe300, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/usr/local/go/src/internal/poll/fd_unix.go:384 +0x1ba
net.(*netFD).accept(0xc0000fe300, 0xc00002ea70, 0xc00002ea00, 0x40bc19)
#011/usr/local/go/src/net/fd_unix.go:238 +0x42
net.(*TCPListener).accept(0xc00009e1b0, 0xc00009add8, 0x950640, 0x20d566625111001)
#011/usr/local/go/src/net/tcpsock_posix.go:139 +0x32
net.(*TCPListener).AcceptTCP(0xc00009e1b0, 0xc00009ae00, 0x478f96, 0x5d2faecb)
#011/usr/local/go/src/net/tcpsock.go:247 +0x48
net/http.tcpKeepAliveListener.Accept(0xc00009e1b0, 0xc00009ae50, 0x18, 0xc000064480, 0x72f6e4)
#011/usr/local/go/src/net/http/server.go:3264 +0x2f
net/http.(*Server).Serve(0xc000087450, 0xa69a60, 0xc00009e1b0, 0x0, 0x0)
#011/usr/local/go/src/net/http/server.go:2859 +0x22d
net/http.(*Server).ListenAndServe(0xc000087450, 0xc000087450, 0x1)
#011/usr/local/go/src/net/http/server.go:2797 +0xe4
net/http.ListenAndServe(...)
#011/usr/local/go/src/net/http/server.go:3037
main.serveHTTP(0x9a94da, 0x5, 0x9ac0f2, 0x8)
#011/app/main.go:50 +0x133
created by main.main
#011/app/main.go:161 +0x1cdf
goroutine 20 [syscall, 2 minutes]:
os/signal.signal_recv(0x0)
#011/usr/local/go/src/runtime/sigqueue.go:139 +0x9c
os/signal.loop()
#011/usr/local/go/src/os/signal/signal_unix.go:23 +0x22
created by os/signal.init.0
#011/usr/local/go/src/os/signal/signal_unix.go:29 +0x41
goroutine 41 [chan receive]:
main.newEventQueue.func1(0xc000122320, 0xc000156280)
#011/app/event.go:88 +0x37
created by main.newEventQueue
#011/app/event.go:86 +0xb9
goroutine 42 [runnable]:
strings.Index(0xc00beecae1, 0x15, 0x9a891a, 0x2, 0x0)
#011/usr/local/go/src/strings/strings.go:947 +0x693
strings.Contains(...)
#011/usr/local/go/src/strings/strings.go:99
main.lineToEvents(0xc00beecada, 0x1c, 0x1, 0x1, 0x1)
#011/app/exporter.go:348 +0x125
main.(*StatsDUDPListener).handlePacket(0xc000106f80, 0xc000227fb1, 0x561, 0xffff)
#011/app/exporter.go:465 +0x123
main.(*StatsDUDPListener).Listen(0xc000106f80)
#011/app/exporter.go:456 +0x8a
created by main.main
#011/app/main.go:182 +0x2bf4
goroutine 43 [IO wait, 2 minutes]:
internal/poll.runtime_pollWait(0x7f0cad0e0f20, 0x72, 0x0)
#011/usr/local/go/src/runtime/netpoll.go:182 +0x56
internal/poll.(*pollDesc).wait(0xc000116518, 0x72, 0x0, 0x0, 0x9ab406)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x9b
internal/poll.(*pollDesc).waitRead(...)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Accept(0xc000116500, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
#011/usr/local/go/src/internal/poll/fd_unix.go:384 +0x1ba
net.(*netFD).accept(0xc000116500, 0x0, 0x0, 0x0)
#011/usr/local/go/src/net/fd_unix.go:238 +0x42
net.(*TCPListener).accept(0xc000112128, 0x0, 0x0, 0x0)
#011/usr/local/go/src/net/tcpsock_posix.go:139 +0x32
net.(*TCPListener).AcceptTCP(0xc000112128, 0x0, 0x0, 0x0)
#011/usr/local/go/src/net/tcpsock.go:247 +0x48
main.(*StatsDTCPListener).Listen(0xc000106fc0)
#011/app/exporter.go:480 +0x5b
created by main.main
#011/app/main.go:194 +0x29fd
goroutine 44 [chan receive, 2 minutes]:
main.configReloader(0x7ffcb2ba9eee, 0x20, 0xc00017a0c0, 0x186a0)
#011/app/main.go:98 +0xb7
created by main.main
#011/app/main.go:257 +0x1f12
goroutine 46 [IO wait]:
internal/poll.runtime_pollWait(0x7f0cad0e0d80, 0x72, 0xffffffffffffffff)
#011/usr/local/go/src/runtime/netpoll.go:182 +0x56
internal/poll.(*pollDesc).wait(0xc000116598, 0x72, 0x1000, 0x1000, 0xffffffffffffffff)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x9b
internal/poll.(*pollDesc).waitRead(...)
#011/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc000116580, 0xc000270000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
#011/usr/local/go/src/internal/poll/fd_unix.go:169 +0x19b
net.(*netFD).Read(0xc000116580, 0xc000270000, 0x1000, 0x1000, 0xc00028f970, 0xc00028f938, 0x724e4a)
#011/usr/local/go/src/net/fd_unix.go:202 +0x4f
net.(*conn).Read(0xc000112160, 0xc000270000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
#011/usr/local/go/src/net/net.go:177 +0x69
net/http.(*connReader).Read(0xc000268060, 0xc000270000, 0x1000, 0x1000, 0xc000030f00, 0xc00a3dc180, 0x6000000000)
#011/usr/local/go/src/net/http/server.go:787 +0x107
bufio.(*Reader).fill(0xc00026e000)
#011/usr/local/go/src/bufio/bufio.go:100 +0x10f
bufio.(*Reader).ReadSlice(0xc00026e000, 0xa, 0x7f0cad0dc1b8, 0xc00028fb00, 0x40bc19, 0xc00015a100, 0x100)
#011/usr/local/go/src/bufio/bufio.go:356 +0x3d
bufio.(*Reader).ReadLine(0xc00026e000, 0xc00028fb08, 0xc000062000, 0x7f0caf38e460, 0x0, 0x40c4a8, 0x30)
#011/usr/local/go/src/bufio/bufio.go:385 +0x34
net/textproto.(*Reader).readLineSlice(0xc00a3dc180, 0xc00015a100, 0xc000116580, 0x0, 0x0, 0x42cee1)
#011/usr/local/go/src/net/textproto/reader.go:55 +0x6f
net/textproto.(*Reader).ReadLine(...)
#011/usr/local/go/src/net/textproto/reader.go:36
net/http.readRequest(0xc00026e000, 0x0, 0xc00015a100, 0x0, 0x0)
#011/usr/local/go/src/net/http/request.go:968 +0x8d
net/http.(*conn).readRequest(0xc00020e0a0, 0xa6a9e0, 0xc000212040, 0x0, 0x0, 0x0)
#011/usr/local/go/src/net/http/server.go:967 +0x163
net/http.(*conn).serve(0xc00020e0a0, 0xa6a9e0, 0xc000212040)
#011/usr/local/go/src/net/http/server.go:1819 +0x6ad
created by net/http.(*Server).Serve
#011/usr/local/go/src/net/http/server.go:2884 +0x2f4
goroutine 100369 [runnable]:
github.com/prometheus/statsd_exporter/pkg/mapper.(*MetricMapperLRUCache).trackCacheLength(0xc0001070e0)
#011/app/pkg/mapper/mapper_cache.go:79
created by github.com/prometheus/statsd_exporter/pkg/mapper.(*MetricMapperLRUCache).AddMatch
#011/app/pkg/mapper/mapper_cache.go:70 +0x49

About this issue

  • Original URL
  • State: closed
  • Created 5 years ago
  • Comments: 19 (12 by maintainers)

Commits related to this issue

Most upvoted comments

I think the crash is because the event queue reuses the slice it sends to the worker.

 func (eq *eventQueue) flushUnlocked() {
        eq.c <- eq.q
-       eq.q = eq.q[:0]
+       eq.q = []Event{}
        eventsFlushed.Inc()
 }

gets rid off all the races for me. I tried eq.c <- eq.q[:], but that didn’t seem to work. I’ve run the load test with that change and have not had a crash for 10 minutes, even when querying (I normally get a crash in <20s). If I run with the race detector all the races (I got a bunch, not just the .Labels())