karpenter-provider-aws: Lease: Failed to get lease: leases.coordination.k8s.io

Version

Karpenter: v0.8.0

Kubernetes: v1.21.5-eks-bc4871b

Expected Behavior

Actual Behavior

Getting Lease: Failed to get lease: leases.coordination.k8s.io "ip-xx-xx-xx-xx.ap-south-1.compute.internal" not found

Steps to Reproduce the Problem

Got this terraform steps from docs, fixed few deprecated modules inputs.

locals {
  cluster_name = "eks-test"
}

## PHASE 1
## EKS
module "vpc" {
  source = "terraform-aws-modules/vpc/aws"

  name = local.cluster_name
  cidr = "10.0.0.0/16"

  azs             = ["ap-south-1a", "ap-south-1b", "ap-south-1c"]
  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
  public_subnets  = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]

  enable_nat_gateway     = true
  single_nat_gateway     = true
  one_nat_gateway_per_az = false

  private_subnet_tags = {
    "kubernetes.io/cluster/${local.cluster_name}" = "owned"
    "karpenter.sh/discovery"                      = local.cluster_name
  }
}

module "eks" {
  source  = "terraform-aws-modules/eks/aws"
  version = "18.18.0"


  cluster_version = "1.21"
  cluster_name    = local.cluster_name
  vpc_id          = module.vpc.vpc_id
  subnet_ids      = module.vpc.private_subnets
  enable_irsa     = true

  eks_managed_node_groups = {
    sentries = {
      min_size     = 1
      max_size     = 1
      desired_size = 1

      instance_types = ["c5.xlarge"]
      capacity_type  = "ON_DEMAND"
    }
  }

  tags = {
    "karpenter.sh/discovery" = local.cluster_name
  }
}

## PHASE 2
## Karpenter
resource "aws_iam_service_linked_role" "spot" {
  aws_service_name = "spot.amazonaws.com"
  depends_on       = [module.eks]
}

data "aws_iam_policy" "ssm_managed_instance" {
  arn        = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
  depends_on = [aws_iam_service_linked_role.spot]
}

resource "aws_iam_role_policy_attachment" "karpenter_ssm_policy" {
  role       = module.eks.cluster_iam_role_name
  policy_arn = data.aws_iam_policy.ssm_managed_instance.arn
  depends_on = [aws_iam_service_linked_role.spot]
}

resource "aws_iam_instance_profile" "karpenter" {
  name       = "KarpenterNodeInstanceProfile-${local.cluster_name}"
  role       = module.eks.cluster_iam_role_name
  depends_on = [aws_iam_service_linked_role.spot]
}
module "iam_assumable_role_karpenter" {
  source                        = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
  version                       = "4.7.0"
  create_role                   = true
  role_name                     = "karpenter-controller-${local.cluster_name}"
  provider_url                  = module.eks.cluster_oidc_issuer_url
  oidc_fully_qualified_subjects = ["system:serviceaccount:karpenter:karpenter"]

  depends_on = [
    aws_iam_role_policy_attachment.karpenter_ssm_policy,
    aws_iam_instance_profile.karpenter
  ]
}

resource "aws_iam_role_policy" "karpenter_controller" {
  name = "karpenter-policy-${local.cluster_name}"
  role = module.iam_assumable_role_karpenter.iam_role_name

  policy = jsonencode({
    Version = "2012-10-17"
    Statement = [
      {
        Action = [
          "ec2:CreateLaunchTemplate",
          "ec2:CreateFleet",
          "ec2:RunInstances",
          "ec2:CreateTags",
          "iam:PassRole",
          "ec2:TerminateInstances",
          "ec2:DescribeLaunchTemplates",
          "ec2:DeleteLaunchTemplate",
          "ec2:DescribeInstances",
          "ec2:DescribeSecurityGroups",
          "ec2:DescribeSubnets",
          "ec2:DescribeInstanceTypes",
          "ec2:DescribeInstanceTypeOfferings",
          "ec2:DescribeAvailabilityZones",
          "ssm:GetParameter"
        ]
        Effect   = "Allow"
        Resource = "*"
      },
    ]
  })

  depends_on = [
    aws_iam_role_policy_attachment.karpenter_ssm_policy,
    aws_iam_instance_profile.karpenter
  ]
}

resource "local_file" "basic" {
  filename = "basic"
  content  = <<EOF
helm repo add karpenter https://charts.karpenter.sh
helm upgrade --install karpenter karpenter/karpenter \
  --version 0.8.0 \
  --create-namespace \
  --namespace karpenter \
  --set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn='${module.iam_assumable_role_karpenter.iam_role_arn}' \
  --set clusterName='${local.cluster_name}' \
  --set clusterEndpoint='${module.eks.cluster_endpoint}' \
  --set aws.defaultInstanceProfile='${aws_iam_instance_profile.karpenter.name}'

## karpenter provisioner
cat <<EOT | kubectl apply -f -
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
metadata:
  name: mumbai
spec:
  requirements:
    - key: "topology.kubernetes.io/zone"
      operator: In
      values: ["ap-south-1a", "ap-south-1b", "ap-south-1c"]
    - key: "karpenter.sh/capacity-type"
      operator: In
      values: ["on-demand"]
    - key: node.kubernetes.io/instance-type
      operator: In
      values: ["t3.2xlarge", "t2.2xlarge", "t3a.2xlarge", "c5.4xlarge"]
  provider:
    subnetSelector:
      karpenter.sh/discovery: eks-test
    securityGroupSelector:
      karpenter.sh/discovery: eks-test
  ttlSecondsAfterEmpty: 10
EOT

## Test it with demo Deployment and scale it
cat <<EOT | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: inflate
spec:
  replicas: 0
  selector:
    matchLabels:
      app: inflate
  template:
    metadata:
      labels:
        app: inflate
    spec:
      terminationGracePeriodSeconds: 0
      containers:
        - name: inflate
          image: public.ecr.aws/eks-distro/kubernetes/pause:3.2
          resources:
            requests:
              cpu: 1
EOT

kubectl scale deployment inflate --replicas 20
EOF
}

This will create file basic with necessary helm, provisioner etc. fields filled up.

image

Resource Specs and Logs

webhook logs https://gist.github.com/pratikbin/5e2f1c54032c6a8c43d4e60e1648c481

controller logs https://gist.github.com/pratikbin/3db319cd9195818f6c814ce8c55644fe

About this issue

  • Original URL
  • State: closed
  • Created 2 years ago
  • Comments: 20 (8 by maintainers)

Most upvoted comments

Hey all, it looks like this issue is now resolved. I’m going to close it out, but feel free to reopen if you’re still having issues.

Thanks!

was running a custom AMI and had this error, my fix was changing

amiFamily: Custom

to

amiFamily: AL2

== heads-up ==

Another “operator error” type issue can occur when one has a Karpenter provisioner with containerRuntime: dockerd runtime, while the cluster have been upgraded to K8s v1.24+, which removed support for dockershim (unless custom AMIs are used). In this scenario, the kubelet fails to start, and nodes fail to register (which becomes evident after checking kubelet logs on EKS node). Updating provisioner config to use the containerd runtime resolves an issue.

Do you have an entry like this in your aws-auth config map?

kubectl get configmap aws-auth -n kube-system -o yaml
apiVersion: v1
data:
  mapRoles: |
    - groups:
      - system:bootstrappers
      - system:nodes
      rolearn: arn:aws:iam::1234567890:role/eksctl-karpenter-demo-nodegroup-k-NodeInstanceRole-YBGH50RFGIEL
      username: system:node:{{EC2PrivateDNSName}}