libzmq: zmq_ctx_term throws Assertion failed: pfd.revents & POLLIN (src/signaler.cpp:226)

When I’m terminating my context, the program throws Assertion failed: pfd.revents & POLLIN (src/signaler.cpp:226) as SIGABORT.

Thread 3 (Thread 0x7f6150b23700 (LWP 3391)):
#0  0x00007f61557f6ee9 in __libc_waitpid (pid=pid@entry=3394, stat_loc=stat_loc@entry=0x7f6150b21c0c, options=options@entry=0) at ../sysdeps/unix/sysv/linux/waitpid.c:40
#1  0x00000000004b4009 in mono_handle_native_sigsegv (signal=<optimized out>, ctx=<optimized out>) at mini-exceptions.c:2323
#2  <signal handler called>
#3  0x00007f6155457bb9 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#4  0x00007f615545afc8 in __GI_abort () at abort.c:89
#5  0x00007f6152061259 in zmq::zmq_abort (errmsg_=errmsg_@entry=0x7f61520c8940 "pfd.revents & POLLIN") at src/err.cpp:74
#6  0x00007f615207f18e in zmq::signaler_t::wait (this=this@entry=0x286b398, timeout_=timeout_@entry=-1) at src/signaler.cpp:226
#7  0x00007f6152064bb0 in zmq::mailbox_t::recv (this=this@entry=0x286b338, cmd_=cmd_@entry=0x7f6150b22a50, timeout_=timeout_@entry=-1) at src/mailbox.cpp:70
#8  0x00007f6152055c3c in zmq::ctx_t::terminate (this=this@entry=0x286b2a0) at src/ctx.cpp:157
#9  0x00007f6152095b38 in zmq_ctx_term (ctx_=0x286b2a0) at src/zmq.cpp:155
#10 0x00000000410d8500 in ?? ()
#11 0x00007f6144002640 in ?? ()
#12 0x00007f6150b22e00 in ?? ()
...

And also sometimes pure virtual method called, terminate called without an active exception

Thread 2 (Thread 0x7fdb11922700 (LWP 3647)):
#0  0x00007fdb1679dee9 in __libc_waitpid (pid=pid@entry=3648, stat_loc=stat_loc@entry=0x7fdb11920a0c, options=options@entry=0) at ../sysdeps/unix/sysv/linux/waitpid.c:40
#1  0x00000000004b4009 in mono_handle_native_sigsegv (signal=<optimized out>, ctx=<optimized out>) at mini-exceptions.c:2323
#2  <signal handler called>
#3  0x00007fdb163febb9 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#4  0x00007fdb16401fc8 in __GI_abort () at abort.c:89
#5  0x00007fdb12d9c6b5 in __gnu_cxx::__verbose_terminate_handler() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#6  0x00007fdb12d9a836 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x00007fdb12d9a863 in std::terminate() () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#8  0x00007fdb12d9b33f in __cxa_pure_virtual () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#9  0x00007fdb13064b92 in read (value_=0x7fdb11921860, this=0x2259f08) at src/ypipe.hpp:156
#10 zmq::mailbox_t::recv (this=this@entry=0x2259f08, cmd_=cmd_@entry=0x7fdb11921860, timeout_=timeout_@entry=-1) at src/mailbox.cpp:62
#11 0x00007fdb13055c3c in zmq::ctx_t::terminate (this=this@entry=0x2259e70) at src/ctx.cpp:157
#12 0x00007fdb13095b38 in zmq_ctx_term (ctx_=0x2259e70) at src/zmq.cpp:155
#13 0x0000000040356ca0 in ?? ()
#14 0x00007fdb000026a0 in ?? ()
#15 0x00007fdb11921c60 in ?? ()
#16 0x0000000000000000 in ?? ()

Any thoughts…?

About this issue

  • Original URL
  • State: closed
  • Created 9 years ago
  • Comments: 18 (13 by maintainers)

Most upvoted comments

Can this be reopened? It’s still occurring.

was autoclosed, but still occurs?

Hi, I got the same crash in signaler.cpp:226 with 4.1.0, this patch seems fixing it

diff --git a/src/signaler.cpp b/src/signaler.cpp
index 25667bf..ba5b288 100644
--- a/src/signaler.cpp
+++ b/src/signaler.cpp
@@ -223,6 +223,11 @@ int zmq::signaler_t::wait (int timeout_)
     }
 #endif
     zmq_assert (rc == 1);
+       if (pfd.revents & POLLNVAL)
+       {
+               errno=EINTR;
+               return -1;
+       }
     zmq_assert (pfd.revents & POLLIN);
     return 0;
diff --git a/src/mailbox.cpp b/src/mailbox.cpp
index bd140a4..da50cf0 100644
--- a/src/mailbox.cpp
+++ b/src/mailbox.cpp
@@ -67,14 +67,18 @@ int zmq::mailbox_t::recv (command_t *cmd_, int timeout_)
     }

     //  Wait for signal from the command sender.
-    const int rc = signaler.wait (timeout_);
+    int rc = signaler.wait (timeout_);
     if (rc == -1) {
         errno_assert (errno == EAGAIN || errno == EBADF);
         return -1;
     }

     //  Receive the signal.
-    signaler.recv ();
+    rc=signaler.recv ();
+    if (rc == -1) {
+        errno_assert (errno == EINTR);
+        return -1;
+    }

     //  Switch into active state.
     active = true;
diff --git a/src/signaler.cpp b/src/signaler.cpp
index ba5b288..a9609df 100644
--- a/src/signaler.cpp
+++ b/src/signaler.cpp
@@ -265,12 +265,17 @@ int zmq::signaler_t::wait (int timeout_)
 #endif
 }

-void zmq::signaler_t::recv ()
+int zmq::signaler_t::recv ()
 {
     //  Attempt to read a signal.
 #if defined ZMQ_HAVE_EVENTFD
     uint64_t dummy;
     ssize_t sz = read (r, &dummy, sizeof (dummy));
+    if (sz==0 || (sz==-1 && errno==EINVAL))
+    {
+        errno=EINTR;
+        return -1;
+    }
     errno_assert (sz == sizeof (dummy));

     //  If we accidentally grabbed the next signal along with the current
@@ -279,7 +284,7 @@ void zmq::signaler_t::recv ()
         const uint64_t inc = 1;
         ssize_t sz2 = write (w, &inc, sizeof (inc));
         errno_assert (sz2 == sizeof (inc));
-        return;
+        return 0;
     }

     zmq_assert (dummy == 1);
@@ -295,6 +300,7 @@ void zmq::signaler_t::recv ()
     zmq_assert (nbytes == sizeof (dummy));
     zmq_assert (dummy == 0);
 #endif
+    return 0;
 }

 #ifdef HAVE_FORK
diff --git a/src/signaler.hpp b/src/signaler.hpp
index b66f0ae..54a271d 100644
--- a/src/signaler.hpp
+++ b/src/signaler.hpp
@@ -44,7 +44,7 @@ namespace zmq
         fd_t get_fd () const;
         void send ();
         int wait (int timeout_);
-        void recv ();
+        int recv ();

 #ifdef HAVE_FORK
         // close the file descriptors in a forked child process so that they
--
1.8.4.GIT

Race condition in ctx destroy (presumably also in *_term) when sending to socket from this context may be provoked with utilising CPU on max. I could reproduce it several times.