runc: Runc init process step on DISK Sleep Status When freezen cgroup

we upgrade docker-ce version to 20:10 from 19.3, And now docker component version:

Client: Docker Engine - Community
 Version:           20.10.1
 API version:       1.41
 Go version:        go1.13.15
 Git commit:        831ebea
 Built:             Tue Dec 15 04:35:01 2020
 OS/Arch:           linux/amd64
 Context:           default
 Experimental:      true

Server: Docker Engine - Community
 Engine:
  Version:          20.10.1
  API version:      1.41 (minimum version 1.12)
  Go version:       go1.13.15
  Git commit:       f001486
  Built:            Tue Dec 15 04:32:57 2020
  OS/Arch:          linux/amd64
  Experimental:     false
 containerd:
  Version:          1.4.3
  GitCommit:        269548fa27e0089a8b8278fc4fc781d7f65a939b
 runc:
  Version:          1.0.0-rc92
  GitCommit:        ff819c7e9184c13b7c2607fe6c30ae19403a7aff
 docker-init:
  Version:          0.19.0
  GitCommit:        de40ad0

when Kubernetes terminal POD , we found container process step into DISK Sleep status , docker inspect container hang forever , kubelet pleg not healthy , kubelet not ready …

when we ps container process … we found runc init process also DISK Sleep …

root     15758 57587  0 16:39 ?        00:00:00 runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/452a153f1775d37f5813c1b71ff46df6e0da76ce5e8450d3df46d1de3cc97b2c/log.json --log-format json --systemd-cgroup exec --process /tmp/runc-process627966115 --detach --pid-file /run/containerd/io.containerd.runtime.v2.task/moby/452a153f1775d37f5813c1b71ff46df6e0da76ce5e8450d3df46d1de3cc97b2c/27e5850550ae06545bde2abb0470316e95938c54ad673bb9b1e0686129d10ae1.pid 452a153f1775d37f5813c1b71ff46df6e0da76ce5e8450d3df46d1de3cc97b2c
root     15795 15758  0 16:39 ?        00:00:00 runc init
root     15801 57587  4 16:39 ?        00:12:50 runc --root /var/run/docker/runtime-runc/moby --log /run/containerd/io.containerd.runtime.v2.task/moby/452a153f1775d37f5813c1b71ff46df6e0da76ce5e8450d3df46d1de3cc97b2c/log.json --log-format json --systemd-cgroup update --resources - 452a153f1775d37f5813c1b71ff46df6e0da76ce5e8450d3df46d1de3cc97b2c

# runc init process step in "D" status...
 
root@bigo:/home/xyz# cat /proc/15795/status
Name:   runc:[2:INIT]
Umask:  0022
State:  D (disk sleep)
Tgid:   15795
Ngid:   0
Pid:    15795
PPid:   15758
TracerPid:      0
Uid:    0       0       0       0
Gid:    0       0       0       0
FDSize: 64
Groups:  
NStgid: 15795   230
NSpid:  15795   230
NSpgid: 15795   230
NSsid:  15795   230
VmPeak:   279548 kB
VmSize:   230660 kB
VmLck:         0 kB
VmPin:         0 kB
VmHWM:     14876 kB
VmRSS:     14876 kB
RssAnon:            9064 kB
RssFile:            5812 kB
RssShmem:              0 kB

About this issue

  • Original URL
  • State: closed
  • Created 3 years ago
  • Comments: 24 (15 by maintainers)

Most upvoted comments

I think we should not return FREEZING as a state as no one expects it. Here’s my attempt at solving this:

diff --git a/libcontainer/cgroups/fs/freezer.go b/libcontainer/cgroups/fs/freezer.go
index 11cb1646..dc265d7f 100644
--- a/libcontainer/cgroups/fs/freezer.go
+++ b/libcontainer/cgroups/fs/freezer.go
@@ -28,33 +28,40 @@ func (s *FreezerGroup) Apply(path string, d *cgroupData) error {
 
 func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
        switch cgroup.Resources.Freezer {
-       case configs.Frozen, configs.Thawed:
-               for {
-                       // In case this loop does not exit because it doesn't get the expected
-                       // state, let's write again this state, hoping it's going to be properly
-                       // set this time. Otherwise, this loop could run infinitely, waiting for
-                       // a state change that would never happen.
-                       if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
+       case configs.Frozen:
+               // As per kernel doc (freezer-subsystem.txt), if FREEZING
+               // is seen, userspace should either retry or thaw.
+               for i := 0; i < 10; i++ {
+                       if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
                                return err
                        }
 
-                       state, err := s.GetState(path)
+                       state, err := fscommon.ReadFile(path, "freezer.state")
                        if err != nil {
                                return err
                        }
-                       if state == cgroup.Resources.Freezer {
-                               break
+                       state = strings.TrimSpace(state)
+                       switch state {
+                       case "FREEZING":
+                               time.Sleep(time.Duration(i) * time.Millisecond)
+                               continue
+                       case string(configs.Frozen):
+                               return nil
+                       default:
+                               return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
                        }
-
-                       time.Sleep(1 * time.Millisecond)
                }
+               // It got stuck in FREEZING. Try to thaw it back
+               // (which will most probably succeed) and error out.
+               _ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
+               return errors.New("unable to freeze")
+       case configs.Thawed:
+               return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
        case configs.Undefined:
                return nil
        default:
                return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
        }
-
-       return nil
 }
 
 func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {

This way, GetState() does not need to be changed as it will eventually succeed.