k8s-bigip-ctlr: [Enhancement]vxlan manager only use node internal IP for fdb endpoint IP cause wrong FDB in bigip

Description

Here is my k8s cluster environment:

  1. Set k8s cluster uses ens33 (172.16.10.0/24) for k8s cluster everything(certificates, Node internalIP, /etc/hosts setting)
  2. Set flannel to use ens160, for vxlan, so flannel public-ip is using 172.16.40.0/24 net. This is different to the node internal IP net.

Nodes interfaces:

//k8s master//
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 172.16.10.201  netmask 255.255.0.0  broadcast 172.16.255.255
        inet6 fe80::93eb:77c9:718:43d2  prefixlen 64  scopeid 0x20<link>
        ether 00:50:56:b3:46:85  txqueuelen 1000  (Ethernet)
        RX packets 352566  bytes 32062651 (30.5 MiB)
        RX errors 0  dropped 4  overruns 0  frame 0
        TX packets 457619  bytes 416423360 (397.1 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens160: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 172.16.40.199  netmask 255.255.255.0  broadcast 172.16.40.255
        inet6 fe80::a35:8f62:68df:ae99  prefixlen 64  scopeid 0x20<link>
        ether 00:50:56:b3:09:f2  txqueuelen 1000  (Ethernet)
        RX packets 278  bytes 26665 (26.0 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 159  bytes 19746 (19.2 KiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

flannel.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        inet 10.244.0.0  netmask 255.255.255.255  broadcast 0.0.0.0
        inet6 fe80::7046:9bff:fe13:8814  prefixlen 64  scopeid 0x20<link>
        ether 72:46:9b:13:88:14  txqueuelen 0  (Ethernet)
        RX packets 80  bytes 7731 (7.5 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 124  bytes 9276 (9.0 KiB)
        TX errors 0  dropped 8 overruns 0  carrier 0  collisions 0

[root@k8s-master f5-k8s]# bridge fdb
00:50:56:b3:2e:29 dev flannel.1 dst 172.16.40.202 self permanent
72:45:92:84:7e:ac dev flannel.1 dst 172.16.40.198 self permanent




//k8s-node1//
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 172.16.10.202  netmask 255.255.255.0  broadcast 172.16.10.255
        inet6 fe80::2cb4:4e38:b3cc:e972  prefixlen 64  scopeid 0x20<link>
        ether 00:50:56:b3:0c:71  txqueuelen 1000  (Ethernet)
        RX packets 457633  bytes 311060432 (296.6 MiB)
        RX errors 0  dropped 5  overruns 0  frame 0
        TX packets 352387  bytes 32126104 (30.6 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

ens160: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 172.16.40.198  netmask 255.255.255.0  broadcast 172.16.40.255
        inet6 fe80::53f1:b4f9:30d7:7da2  prefixlen 64  scopeid 0x20<link>
        ether 00:50:56:b3:1b:9b  txqueuelen 1000  (Ethernet)
        RX packets 145668  bytes 141254015 (134.7 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 139284  bytes 51315882 (48.9 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0

flannel.1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1450
        inet 10.244.1.0  netmask 255.255.255.255  broadcast 0.0.0.0
        inet6 fe80::7045:92ff:fe84:7eac  prefixlen 64  scopeid 0x20<link>
        ether 72:45:92:84:7e:ac  txqueuelen 0  (Ethernet)
        RX packets 96  bytes 6924 (6.7 KiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 80  bytes 7731 (7.5 KiB)
        TX errors 0  dropped 8 overruns 0  carrier 0  collisions 0


[root@k8s-node1 ~]# bridge fdb
00:50:56:b3:2e:29 dev flannel.1 dst 172.16.40.202 self permanent
72:46:9b:13:88:14 dev flannel.1 dst 172.16.40.199 self permanent
da:05:79:a8:36:93 dev flannel.1 dst 172.16.40.199 self permanent

Everything is ok, like: curl k8s-svc-luster-ip in the node self.

[root@k8s-node1 ~]# curl 10.250.0.75
This is k8s-node1 /root/cka/data/index.html
[root@k8s-node1 ~]# curl 10.250.0.75
This is k8s-master /root/cka/data/index.html
  1. Set F5 as a fake node for k8s, and set vxlan, here is BIGIP setting:
myf5@(v13-common)(cfg-sync Not All Devices Synced)(Active)(/Common)(tmos)# list net self
net self self_flannel_vxlan { <<<<<<<<<<<<<<<<<<<<<
    address 10.244.244.2/16
    allow-service {
        default
    }
    traffic-group traffic-group-local-only
    vlan flannel_tunel_vxlan
}
net self ext_self_v6 {
    address fdf1:f2f3:f4f5:f6f7::ac10:1eca/64
    allow-service {
        default
    }
    traffic-group traffic-group-local-only
    vlan ext_vlan
}
net self ext_self {
    address 172.16.30.202/24
    allow-service {
        default
    }
    traffic-group traffic-group-local-only
    vlan ext_vlan
}
net self float_self_flannel_vxlan { <<<<<<<<<<<<<<<<<
    address 10.244.244.3/16
    allow-service {
        default
    }
    floating enabled
    traffic-group traffic-group-1
    unit 1
    vlan flannel_tunel_vxlan
}
net self vtep {   <<<<<<<<<<<<<<<<<<<
    address 172.16.40.202/24
    allow-service {
        default
    }
    traffic-group traffic-group-local-only
    vlan int_vlan
}


myf5@(v13-common)(cfg-sync Not All Devices Synced)(Active)(/Common)(tmos)# list net tunnels vxlan 
net tunnels vxlan fl-vxlan {
    app-service none
    flooding-type none
    port otv
}


myf5@(v13-common)(cfg-sync Not All Devices Synced)(Active)(/Common)(tmos)# list net tunnels tunnel flannel_tunel_vxlan 
net tunnels tunnel flannel_tunel_vxlan {
    if-index 208
    key 1
    local-address 172.16.40.202
    profile fl-vxlan
}

The bigip is using node internal IP for FDB:

myf5@(v13-common)(cfg-sync Not All Devices Synced)(Active)(/Common)(tmos)# show net fdb

-------------------------------------------------------------------------
Net::FDB           
Tunnel               Mac Address        Member                    Dynamic
-------------------------------------------------------------------------
flannel_tunel_vxlan  72:46:9b:13:88:14  endpoint:172.16.10.201%0  no
flannel_tunel_vxlan  72:45:92:84:7e:ac  endpoint:172.16.10.202%0  no

And arp can not be set:

myf5@(v13-common)(cfg-sync Not All Devices Synced)(Active)(/Common)(tmos)# show net arp

-----------------------------------------------------------------------------------------------------
Net::Arp     
Name           Address        HWaddress          Vlan                         Expire-in-sec  Status
-----------------------------------------------------------------------------------------------------
10.244.1.4     10.244.1.4     incomplete         /Common/flannel_tunel_vxlan  0              unknown <<<<<!!!
172.16.30.203  172.16.30.203  00:50:56:b3:03:ff  /Common/ext_vlan             294            resolved
172.16.40.198  172.16.40.198  00:50:56:b3:1b:9b  /Common/int_vlan             254            resolved

The bigip-ctrl logs shows: 2018/12/15 01:19:27 [ERROR] Vxlan manager could not get VtepMac for 10.244.0.59’s node.

After checking the vxlan mgmr source code, The above logs was caused different node internal ip and flannel public-ip.

From the source code, also can find the bigip-ctrl is using node internal IP for fdb records.

Should the bigip-ctrl use flannel public-ip for node vtep here?

Kubernetes Version

<Version of Kubernetes being used>

Controller Version

Version: v1.7.1, BuildInfo: n1279-465125010

BIG-IP Version

Product BIG-IP Version 13.1.1 Build 0.0.4 Edition Final Date Fri Jul 20 17:55:49 PDT 2018

Diagnostic Information

[root@k8s-node1 ~]# kubectl get nodes -o yaml
apiVersion: v1
items:
- apiVersion: v1
  kind: Node
  metadata:
    annotations:
      flannel.alpha.coreos.com/backend-data: '{"VtepMAC":"00:50:56:b3:2e:29"}'
      flannel.alpha.coreos.com/backend-type: vxlan
      flannel.alpha.coreos.com/kube-subnet-manager: "true"
      flannel.alpha.coreos.com/public-ip: 172.16.40.202
      node.alpha.kubernetes.io/ttl: "0"
    creationTimestamp: 2018-12-14T15:17:46Z
    name: bigip
    namespace: ""
    resourceVersion: "7342120"
    selfLink: /api/v1/nodes/bigip
    uid: 6933c860-ffb3-11e8-9344-005056b34685
  spec:
    externalID: bigip
    podCIDR: 10.244.244.0/24
  status:
    conditions:
    - lastHeartbeatTime: 2018-12-14T15:17:46Z
      lastTransitionTime: 2018-12-14T15:18:47Z
      message: Kubelet never posted node status.
      reason: NodeStatusNeverUpdated
      status: Unknown
      type: Ready
    - lastHeartbeatTime: 2018-12-14T15:17:46Z
      lastTransitionTime: 2018-12-14T15:18:47Z
      message: Kubelet never posted node status.
      reason: NodeStatusNeverUpdated
      status: Unknown
      type: OutOfDisk
    - lastHeartbeatTime: 2018-12-14T15:17:46Z
      lastTransitionTime: 2018-12-14T15:18:47Z
      message: Kubelet never posted node status.
      reason: NodeStatusNeverUpdated
      status: Unknown
      type: MemoryPressure
    - lastHeartbeatTime: 2018-12-14T15:17:46Z
      lastTransitionTime: 2018-12-14T15:18:47Z
      message: Kubelet never posted node status.
      reason: NodeStatusNeverUpdated
      status: Unknown
      type: DiskPressure
    daemonEndpoints:
      kubeletEndpoint:
        Port: 0
    nodeInfo:
      architecture: ""
      bootID: ""
      containerRuntimeVersion: ""
      kernelVersion: ""
      kubeProxyVersion: ""
      kubeletVersion: ""
      machineID: ""
      operatingSystem: ""
      osImage: ""
      systemUUID: ""
- apiVersion: v1
  kind: Node
  metadata:
    annotations:
      flannel.alpha.coreos.com/backend-data: '{"VtepMAC":"72:46:9b:13:88:14"}'
      flannel.alpha.coreos.com/backend-type: vxlan
      flannel.alpha.coreos.com/kube-subnet-manager: "true"
      flannel.alpha.coreos.com/public-ip: 172.16.40.199
      node.alpha.kubernetes.io/ttl: "0"
      volumes.kubernetes.io/controller-managed-attach-detach: "true"
    creationTimestamp: 2018-08-05T14:42:30Z
    labels:
      beta.kubernetes.io/arch: amd64
      beta.kubernetes.io/os: linux
      kubernetes.io/hostname: k8s-master
      owner: lj
      role: master
    name: k8s-master
    namespace: ""
    resourceVersion: "7439863"
    selfLink: /api/v1/nodes/k8s-master
    uid: c7b4efba-98bd-11e8-aeed-000c29850765
  spec:
    externalID: k8s-master
    podCIDR: 10.244.0.0/24
  status:
    addresses:
    - address: 172.16.10.201
      type: InternalIP
    - address: k8s-master
      type: Hostname
    allocatable:
      cpu: "1"
      ephemeral-storage: 17868Mi
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 8152044Ki
      pods: "110"
    capacity:
      cpu: "1"
      ephemeral-storage: 17878Mi
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 8157164Ki
      pods: "110"
    conditions:
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-10-17T00:00:34Z
      message: kubelet has sufficient disk space available
      reason: KubeletHasSufficientDisk
      status: "False"
      type: OutOfDisk
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-10-17T00:00:35Z
      message: kubelet has sufficient memory available
      reason: KubeletHasSufficientMemory
      status: "False"
      type: MemoryPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-11-27T06:21:00Z
      message: kubelet has no disk pressure
      reason: KubeletHasNoDiskPressure
      status: "False"
      type: DiskPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-08-05T14:42:30Z
      message: kubelet has sufficient PID available
      reason: KubeletHasSufficientPID
      status: "False"
      type: PIDPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-12-14T07:41:00Z
      message: kubelet is posting ready status
      reason: KubeletReady
      status: "True"
      type: Ready
    daemonEndpoints:
      kubeletEndpoint:
        Port: 10250
    images:
    - names:
      - f5devcentral/f5-as3-container@sha256:bb5aa050ba3839249038b579552b931f1e7ecea4ef177cf4c804779879625508
      - f5devcentral/f5-as3-container:latest
      sizeBytes: 487911870
    - names:
      - cargo.caicloud.io/caicloud/training-calico-node@sha256:2d5255fab62c29226a9a4121e1251439c2861641532edd653a009c11f4ec1b4f
      - cargo.caicloud.io/caicloud/training-calico-node:v2.6.2
      sizeBytes: 281619208
    - names:
      - f5networks/k8s-bigip-ctlr@sha256:120f45d5c8f57397d9ea95fa060361c46c1cb1a18d80f3c5d110d1d0b85652d5
      - f5networks/k8s-bigip-ctlr:latest
      sizeBytes: 266809874
    - names:
      - nginx@sha256:59f8d756e723a610d60bdcb37a4d68185a7b67bfc7ef4c616dd022458ec3db3a
      - nginx@sha256:5d32f60db294b5deb55d078cd4feb410ad88e6fe77500c87d3970eca97f54dba
      - nginx:latest
      sizeBytes: 109096776
    - names:
      - myf5/kubernetes-dashboard-amd64@sha256:2d41043bb04f32d2349ace6851fb80c50c689e38a94df1bfcb80802ccbbe09fd
      - myf5/kubernetes-dashboard-amd64:v1.8.3
      sizeBytes: 102319441
    - names:
      - cargo.caicloud.io/caicloud/training-calico-cni@sha256:063dafaee87aa7882fda7844de9258cd17666b3d61ce4547c910fca0326f0708
      - cargo.caicloud.io/caicloud/training-calico-cni:v1.11.0
      sizeBytes: 70883432
    - names:
      - cargo.caicloud.io/caicloud/training-flannel@sha256:93952a105b4576e8f09ab8c4e00483131b862c24180b0b7d342fb360bbe44f3d
      - cargo.caicloud.io/caicloud/training-flannel:v0.9.1
      sizeBytes: 51314612
    - names:
      - cargo.caicloud.io/caicloud/training-k8s-dns-kube-dns-amd64@sha256:5e69a3242bfb784dd3d678aa55675144f6780ebfc9c12a04f2fcaefe1f57eb7d
      - cargo.caicloud.io/caicloud/training-k8s-dns-kube-dns-amd64:1.14.4
      sizeBytes: 49383112
    - names:
      - cargo.caicloud.io/caicloud/training-k8s-dns-sidecar-amd64@sha256:dfaa92808121abce0a45da739179db653b230171cad4d582cf152967c07a68d3
      - cargo.caicloud.io/caicloud/training-k8s-dns-sidecar-amd64:1.14.4
      sizeBytes: 41814878
    - names:
      - cargo.caicloud.io/caicloud/training-k8s-dns-dnsmasq-nanny-amd64@sha256:f403d0b737d7229349ccf638dbc3c538b326048970f679738c27ca0bbcf859df
      - cargo.caicloud.io/caicloud/training-k8s-dns-dnsmasq-nanny-amd64:1.14.4
      sizeBytes: 41410758
    - names:
      - nginx@sha256:1134289c4e177da4547cfa9a97e41943cf03c29520f084d1c4e4622474c8e0e9
      - nginx:1.15-alpine
      sizeBytes: 17750118
    - names:
      - nginx@sha256:94e890939a012d8494ac62a0cf6177c57f97522b35e231dad676e089e309d49d
      sizeBytes: 17745892
    - names:
      - cargo.caicloud.io/caicloud/busybox@sha256:956f6d761c249b42459915ea9a094b84b82f3cdf815f11dd9c91fc5c23f0647a
      - cargo.caicloud.io/caicloud/busybox:1.24
      sizeBytes: 1113554
    - names:
      - cargo.caicloud.io/caicloud/pause-amd64@sha256:eb7d5e4b850dae2cd24897b8a74336459f6eb321e2538d33902f9919eb4f4c98
      - cargo.caicloud.io/caicloud/pause-amd64:3.0
      sizeBytes: 746888
    nodeInfo:
      architecture: amd64
      bootID: 8a175893-cc0b-4204-b156-b8cd4f7ea20d
      containerRuntimeVersion: docker://18.6.0
      kernelVersion: 3.10.0-862.9.1.el7.x86_64
      kubeProxyVersion: v1.10.6
      kubeletVersion: v1.10.6
      machineID: 0edb716e1cf347faa490b2a81c5e6dcd
      operatingSystem: linux
      osImage: CentOS Linux 7 (Core)
      systemUUID: 86FF3342-51DF-DCD5-A390-33A8B53D04A9
- apiVersion: v1
  kind: Node
  metadata:
    annotations:
      flannel.alpha.coreos.com/backend-data: '{"VtepMAC":"72:45:92:84:7e:ac"}'
      flannel.alpha.coreos.com/backend-type: vxlan
      flannel.alpha.coreos.com/kube-subnet-manager: "true"
      flannel.alpha.coreos.com/public-ip: 172.16.40.198
      node.alpha.kubernetes.io/ttl: "0"
      volumes.kubernetes.io/controller-managed-attach-detach: "true"
    creationTimestamp: 2018-08-05T17:10:45Z
    labels:
      beta.kubernetes.io/arch: amd64
      beta.kubernetes.io/os: linux
      kubernetes.io/hostname: k8s-node1
      owner: lj
      role: node
    name: k8s-node1
    namespace: ""
    resourceVersion: "7439862"
    selfLink: /api/v1/nodes/k8s-node1
    uid: 7dff767b-98d2-11e8-aeed-000c29850765
  spec:
    externalID: k8s-node1
    podCIDR: 10.244.1.0/24
  status:
    addresses:
    - address: 172.16.10.202
      type: InternalIP
    - address: k8s-node1
      type: Hostname
    allocatable:
      cpu: "1"
      ephemeral-storage: 17868Mi
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 8152052Ki
      pods: "110"
    capacity:
      cpu: "1"
      ephemeral-storage: 17878Mi
      hugepages-1Gi: "0"
      hugepages-2Mi: "0"
      memory: 8157172Ki
      pods: "110"
    conditions:
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-11-25T07:48:12Z
      message: kubelet has sufficient disk space available
      reason: KubeletHasSufficientDisk
      status: "False"
      type: OutOfDisk
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-11-25T07:48:12Z
      message: kubelet has sufficient memory available
      reason: KubeletHasSufficientMemory
      status: "False"
      type: MemoryPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-11-27T07:03:34Z
      message: kubelet has no disk pressure
      reason: KubeletHasNoDiskPressure
      status: "False"
      type: DiskPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-08-06T01:41:16Z
      message: kubelet has sufficient PID available
      reason: KubeletHasSufficientPID
      status: "False"
      type: PIDPressure
    - lastHeartbeatTime: 2018-12-15T13:44:48Z
      lastTransitionTime: 2018-11-28T00:34:13Z
      message: kubelet is posting ready status
      reason: KubeletReady
      status: "True"
      type: Ready
    daemonEndpoints:
      kubeletEndpoint:
        Port: 10250
    images:
    - names:
      - f5devcentral/f5-as3-container@sha256:9ccf5a8e18699742440ea50b9023a9728987d04da4710869e8964d36bcf1d552
      - f5devcentral/f5-as3-container:3.5.0
      sizeBytes: 485211181
    - names:
      - cargo.caicloud.io/caicloud/training-calico-node@sha256:2d5255fab62c29226a9a4121e1251439c2861641532edd653a009c11f4ec1b4f
      - cargo.caicloud.io/caicloud/training-calico-node:v2.6.2
      sizeBytes: 281619208
    - names:
      - f5networks/k8s-bigip-ctlr@sha256:32e391b85b1cd100fd4d0e471e8fda0c02397089710db1bba8140337ed3ddbda
      - f5networks/k8s-bigip-ctlr:latest
      sizeBytes: 272118575
    - names:
      - nginx@sha256:5d32f60db294b5deb55d078cd4feb410ad88e6fe77500c87d3970eca97f54dba
      - nginx:latest
      sizeBytes: 109096776
    - names:
      - cargo.caicloud.io/caicloud/training-calico-cni@sha256:063dafaee87aa7882fda7844de9258cd17666b3d61ce4547c910fca0326f0708
      - cargo.caicloud.io/caicloud/training-calico-cni:v1.11.0
      sizeBytes: 70883432
    - names:
      - cargo.caicloud.io/caicloud/training-flannel@sha256:93952a105b4576e8f09ab8c4e00483131b862c24180b0b7d342fb360bbe44f3d
      - cargo.caicloud.io/caicloud/training-flannel:v0.9.1
      sizeBytes: 51314612
    - names:
      - busybox@sha256:2a03a6059f21e150ae84b0973863609494aad70f0a80eaeb64bddd8d92465812
      - busybox:latest
      sizeBytes: 1154353
    - names:
      - cargo.caicloud.io/caicloud/pause-amd64@sha256:eb7d5e4b850dae2cd24897b8a74336459f6eb321e2538d33902f9919eb4f4c98
      - cargo.caicloud.io/caicloud/pause-amd64:3.0
      sizeBytes: 746888
    nodeInfo:
      architecture: amd64
      bootID: 6695510b-6607-45d9-b300-1e17c14e4f19
      containerRuntimeVersion: docker://18.6.0
      kernelVersion: 3.10.0-862.9.1.el7.x86_64
      kubeProxyVersion: v1.10.6
      kubeletVersion: v1.10.6
      machineID: 0edb716e1cf347faa490b2a81c5e6dcd
      operatingSystem: linux
      osImage: CentOS Linux 7 (Core)
      systemUUID: 6D0B3342-1F42-BC9A-0FFE-BC296CF29F6F
kind: List
metadata:
  resourceVersion: ""
  selfLink: ""

About this issue

  • Original URL
  • State: closed
  • Created 6 years ago
  • Comments: 23 (15 by maintainers)

Commits related to this issue

Most upvoted comments

Hi Vincent, Thanks will try out recently.