karpenter-provider-aws: Lease: Failed to get lease: leases.coordination.k8s.io
Version
Karpenter: v0.8.0
Kubernetes: v1.21.5-eks-bc4871b
Expected Behavior
Actual Behavior
Getting Lease: Failed to get lease: leases.coordination.k8s.io "ip-xx-xx-xx-xx.ap-south-1.compute.internal" not found
Steps to Reproduce the Problem
Got this terraform steps from docs, fixed few deprecated modules inputs.
locals {
cluster_name = "eks-test"
}
## PHASE 1
## EKS
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
name = local.cluster_name
cidr = "10.0.0.0/16"
azs = ["ap-south-1a", "ap-south-1b", "ap-south-1c"]
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.101.0/24", "10.0.102.0/24", "10.0.103.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
one_nat_gateway_per_az = false
private_subnet_tags = {
"kubernetes.io/cluster/${local.cluster_name}" = "owned"
"karpenter.sh/discovery" = local.cluster_name
}
}
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "18.18.0"
cluster_version = "1.21"
cluster_name = local.cluster_name
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
enable_irsa = true
eks_managed_node_groups = {
sentries = {
min_size = 1
max_size = 1
desired_size = 1
instance_types = ["c5.xlarge"]
capacity_type = "ON_DEMAND"
}
}
tags = {
"karpenter.sh/discovery" = local.cluster_name
}
}
## PHASE 2
## Karpenter
resource "aws_iam_service_linked_role" "spot" {
aws_service_name = "spot.amazonaws.com"
depends_on = [module.eks]
}
data "aws_iam_policy" "ssm_managed_instance" {
arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
depends_on = [aws_iam_service_linked_role.spot]
}
resource "aws_iam_role_policy_attachment" "karpenter_ssm_policy" {
role = module.eks.cluster_iam_role_name
policy_arn = data.aws_iam_policy.ssm_managed_instance.arn
depends_on = [aws_iam_service_linked_role.spot]
}
resource "aws_iam_instance_profile" "karpenter" {
name = "KarpenterNodeInstanceProfile-${local.cluster_name}"
role = module.eks.cluster_iam_role_name
depends_on = [aws_iam_service_linked_role.spot]
}
module "iam_assumable_role_karpenter" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
version = "4.7.0"
create_role = true
role_name = "karpenter-controller-${local.cluster_name}"
provider_url = module.eks.cluster_oidc_issuer_url
oidc_fully_qualified_subjects = ["system:serviceaccount:karpenter:karpenter"]
depends_on = [
aws_iam_role_policy_attachment.karpenter_ssm_policy,
aws_iam_instance_profile.karpenter
]
}
resource "aws_iam_role_policy" "karpenter_controller" {
name = "karpenter-policy-${local.cluster_name}"
role = module.iam_assumable_role_karpenter.iam_role_name
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = [
"ec2:CreateLaunchTemplate",
"ec2:CreateFleet",
"ec2:RunInstances",
"ec2:CreateTags",
"iam:PassRole",
"ec2:TerminateInstances",
"ec2:DescribeLaunchTemplates",
"ec2:DeleteLaunchTemplate",
"ec2:DescribeInstances",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSubnets",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInstanceTypeOfferings",
"ec2:DescribeAvailabilityZones",
"ssm:GetParameter"
]
Effect = "Allow"
Resource = "*"
},
]
})
depends_on = [
aws_iam_role_policy_attachment.karpenter_ssm_policy,
aws_iam_instance_profile.karpenter
]
}
resource "local_file" "basic" {
filename = "basic"
content = <<EOF
helm repo add karpenter https://charts.karpenter.sh
helm upgrade --install karpenter karpenter/karpenter \
--version 0.8.0 \
--create-namespace \
--namespace karpenter \
--set serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn='${module.iam_assumable_role_karpenter.iam_role_arn}' \
--set clusterName='${local.cluster_name}' \
--set clusterEndpoint='${module.eks.cluster_endpoint}' \
--set aws.defaultInstanceProfile='${aws_iam_instance_profile.karpenter.name}'
## karpenter provisioner
cat <<EOT | kubectl apply -f -
apiVersion: karpenter.sh/v1alpha5
kind: Provisioner
metadata:
name: mumbai
spec:
requirements:
- key: "topology.kubernetes.io/zone"
operator: In
values: ["ap-south-1a", "ap-south-1b", "ap-south-1c"]
- key: "karpenter.sh/capacity-type"
operator: In
values: ["on-demand"]
- key: node.kubernetes.io/instance-type
operator: In
values: ["t3.2xlarge", "t2.2xlarge", "t3a.2xlarge", "c5.4xlarge"]
provider:
subnetSelector:
karpenter.sh/discovery: eks-test
securityGroupSelector:
karpenter.sh/discovery: eks-test
ttlSecondsAfterEmpty: 10
EOT
## Test it with demo Deployment and scale it
cat <<EOT | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: inflate
spec:
replicas: 0
selector:
matchLabels:
app: inflate
template:
metadata:
labels:
app: inflate
spec:
terminationGracePeriodSeconds: 0
containers:
- name: inflate
image: public.ecr.aws/eks-distro/kubernetes/pause:3.2
resources:
requests:
cpu: 1
EOT
kubectl scale deployment inflate --replicas 20
EOF
}
This will create file basic
with necessary helm, provisioner etc. fields filled up.
Resource Specs and Logs
webhook logs https://gist.github.com/pratikbin/5e2f1c54032c6a8c43d4e60e1648c481
controller logs https://gist.github.com/pratikbin/3db319cd9195818f6c814ce8c55644fe
About this issue
- Original URL
- State: closed
- Created 2 years ago
- Comments: 20 (8 by maintainers)
Hey all, it looks like this issue is now resolved. I’m going to close it out, but feel free to reopen if you’re still having issues.
Thanks!
was running a custom AMI and had this error, my fix was changing
amiFamily: Custom
to
amiFamily: AL2
== heads-up ==
Another “operator error” type issue can occur when one has a Karpenter provisioner with
containerRuntime: dockerd
runtime, while the cluster have been upgraded to K8s v1.24+, which removed support for dockershim (unless custom AMIs are used). In this scenario, the kubelet fails to start, and nodes fail to register (which becomes evident after checking kubelet logs on EKS node). Updating provisioner config to use thecontainerd
runtime resolves an issue.Do you have an entry like this in your aws-auth config map?