阿里云ECS搭建高可用k8s集群及Terway、CCM、SLB配置

2022-06-14 8271 words 17 minutes

Contents

简介

在阿里云 ECS 上自建k8s时，除自行配置Nginx对外提供服务外，还可以使用阿里云官方的CNI插件Terway以及CCM 使用SLB对外提供服务，本文主要介绍该方式

约定

最小服务器要求

节点名	内网IP	配置	系统	备注
k8s-01	192.168.1.27	4C8G	CentOS 7.X	control plane、ETCD
k8s-02	192.168.1.28	4C8G	CentOS 7.X	control plane、ETCD
k8s-03	192.168.1.29	4C8G	CentOS 7.X	control plane、ETCD
k8s-04	192.168.1.30	4C16G	CentOS 7.X	Node
k8s-05	192.168.1.31	4C16G	CentOS 7.X	Node

control plane 和 ETCD 为保证高可用，建议数量为>=3的奇数
阿里云 CCM 任何时候均不会把Master节点作为SLB后端

版本约定

目前github上 terway 的deployment文件仅支持 k8s 1.22以下的版本，我们使用1.21的最后一个版本：1.21.13

开始安装

IPVS

        
        
        
    
cat > /etc/sysctl.d/k8s.conf << EOF
net.ipv4.ip_forward                 = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF

# 使设置生效
sysctl --system
# 创建 modules 文件，保证在节点重启后能自动加载所需模块
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe br_netfilter
modprobe -- ip_vs
modprobe -- ip_vs_lc
modprobe -- ip_vs_wlc
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_lblc
modprobe -- ip_vs_lblcr
modprobe -- ip_vs_dh
modprobe -- ip_vs_sh
# 3.10 内核不支持
#modprobe -- ip_vs_fo
modprobe -- ip_vs_nq
modprobe -- ip_vs_sed
modprobe -- ip_vs_ftp
EOF

chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
yum install ipset ipvsadm  -y
# 检查是否安装 成功 
lsmod | grep -e ip_vs -e nf_conntrack_ipv4

输出以下信息，表示安装成功

Haproxy

因阿里云SLB不支持服务器同时作为客户端和服务商，故我们使用Haproxy 和 Nginx，来进行master节点的负载均衡。因Nginx可能还会被用途web服务器中间件，会涉及到重启影响负载效果，在这里我们以Haproxy为例

        
        
        
    
# haproxy的安装和配置都非常简单
yum install -y haproxy
# cat cat /etc/haproxy/haproxy.cfg
global
    log 127.0.0.1 local0
    log 127.0.0.1 local1 notice
 
    maxconn 4096
    chroot /var/lib/haproxy
    user haproxy
    group haproxy
    daemon
defaults
    log global
    mode tcp
    option tcplog
    option dontlognull
    retries 3
    option redispatch
    maxconn 2000
#    contimeout 5000
#    clitimeout 50000
#    srvtimeout 50000
    timeout connect 5000
    timeout client 50000
    timeout server 50000
listen zabbixServer
    bind 127.0.0.1:6443
    server k8s-01 192.168.1.27:5443 check inter 3000 fall 3 rise 3
    server k8s-02 192.168.1.28:5443 check inter 3000 fall 3 rise 3
    server k8s-03 192.168.1.29:5443 check inter 3000 fall 3 rise 3
  
  # 开机启动
  systemctl enable haproxy
  # 启动 haproxy
  systemctl start haproxy
  
  # 检查
  ss -anltp | grep haproxy

ETCD

安装

使用以下Shell脚本进行安装最新的ETCD版本

        
        
        
    
#!/bin/bash

. /etc/profile

function before_install() {
    getent group etcd >/dev/null || groupadd -r etcd
    getent passwd etcd >/dev/null || useradd -r -g etcd -d /var/lib/etcd -s /sbin/nologin -c "etcd user" etcd

    if [ ! -d "/var/lib/etcd" ]; then
        mkdir /var/lib/etcd
        chown -R etcd:etcd /var/lib/etcd
    fi
}

function get_cfssl_version(){
    local cfssl_version=$(curl -s https://api.github.com/repos/cloudflare/cfssl/releases/latest | grep tag_name | cut -d '"' -f 4| cut -b 2-)
    echo $cfssl_version
}

function install_cfssl(){
    local cfssl_version=$(get_cfssl_version)
    local cfssl_url="https://github.com/cloudflare/cfssl/releases/download/v${cfssl_version}/cfssl_${cfssl_version}_linux_amd64"
    local cfssl_bin="cfssl"
    local cfssl_bin_path="/usr/local/bin/${cfssl_bin}"

    local cfssljson_url="https://github.com/cloudflare/cfssl/releases/download/v${cfssl_version}/cfssljson_${cfssl_version}_linux_amd64"
    local cfssljson_bin="cfssljson"
    local cfssljson_bin_path="/usr/local/bin/${cfssljson_bin}"

    echo "cfssl_version: ${cfssl_version}"

    echo "Downloading cfssl..."
    rm -f ${cfssl_bin_path}
    wget -q -O ${cfssl_bin_path} ${cfssl_url}
    if [ $? -ne 0 ]; then
        echo "Failed to download cfssl."
        exit 1
    fi
    chmod +x ${cfssl_bin_path}
    echo "cfssl installed."
    
    echo "Downloading cfssljson..."
    rm -f ${cfssljson_bin_path}
    wget -q -O ${cfssljson_bin_path} ${cfssljson_url}
    if [ $? -ne 0 ]; then
        echo "Failed to download cfssljson."
        exit 1
    fi
    chmod +x ${cfssljson_bin_path}
    echo "cfssljson installed."
}

function get_etcd_version() {
    local etcd_version=$(curl -s https://api.github.com/repos/etcd-io/etcd/releases/latest | grep tag_name | cut -d '"' -f 4)
    echo $etcd_version
}

function install_etcd() {
    local etcd_version=$(get_etcd_version)
    local etcd_url="https://github.com/etcd-io/etcd/releases/download/${etcd_version}/etcd-${etcd_version}-linux-amd64.tar.gz"
    local etcd_file="etcd-${etcd_version}-linux-amd64.tar.gz"
    local etcd_dir="etcd-${etcd_version}-linux-amd64"

    echo "etcd_version: ${etcd_version}"

    cd /tmp
    echo "Downloading etcd..."
    wget -q -O ${etcd_file} ${etcd_url}
    if [ $? -ne 0 ]; then
        echo "download etcd failed"
        exit 1
    fi

    tar -xzf ${etcd_file}
    rm -f /usr/local/bin/etcd /usr/local/bin/etcdctl /usr/local/bin/etcdutl
    mv ${etcd_dir}/etcd /usr/local/bin/
    mv ${etcd_dir}/etcdctl /usr/local/bin/
    mv ${etcd_dir}/etcdutl /usr/local/bin/
    chmod +x /usr/local/bin/etcd /usr/local/bin/etcdctl /usr/local/bin/etcdutl
    echo "etcd installed successfully"

    echo "clean up..."
    rm -f ${etcd_file}
    rm -rf ${etcd_dir}

    cd -
}

function etcd_systemd() {
    cat > /etc/systemd/system/etcd.service << EOF
[Unit]
Description=etcd - highly-available key value store
Documentation=https://github.com/coreos/etcd
After=network.target
Wants=network-online.target

[Service]
#Type=notify
Type=simple
User=etcd
ExecStart=/usr/local/bin/etcd  --config-file /etc/etcd/etcd.yaml
Restart=on-failure
RestartSec=10s

LimitNOFILE=65536
#MemoryLow=200M
Nice=-10
IOSchedulingClass=best-effort
IOSchedulingPriority=2

[Install]
WantedBy=multi-user.target
EOF
}

function after_install() {
    mkdir -p /data/etcd/{data,wal}
    chown -R etcd:etcd /data/etcd

    mkdir -p /etc/etcd/{cfssl, ssl}
    chown -R etcd:etcd /etc/etcd

    echo "Installing systemd service..."
    etcd_systemd
    systemctl enable etcd
    
    grep 'ETCDCTL_API=3' /etc/profile > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo "export ETCDCTL_API=3" >> /etc/profile
        . /etc/profile
    fi
    
    grep '$PATH:/usr/local/bin' /etc/profile > /dev/null 2>&1
    if [ $? -ne 0 ]; then
        echo 'export PATH=$PATH:/usr/local/bin' >> /etc/profile
    fi
    
    . /etc/profile
}

function main(){
    before_install

    install_etcd
    install_cfssl

    after_install
}

main

生成证书

需要建立证书配置文件：etcd-root-ca-csr.json、etcd-csr.json、etcd-gencert.json，具体为：

        
        
        
    
# hosts 中需要根据实际情况修改
# cat etcd-csr.json
{
    "key": {
        "algo": "rsa",
        "size": 2048 
    },
    "names": [
        {
            "O": "etcd",
            "OU": "etcd Security",
            "L": "Chengdu",
            "ST": "Sichuan",
            "C": "CN"
        }
    ],
    "CN": "etcd",
    "hosts": [
        "127.0.0.1",
        "localhost",
        "*.etcd.node",
        "*.kubernetes.node",
        "192.168.1.27",
        "192.168.1.28",
        "192.168.100.29"
    ]
}
# cat etcd-gencert.json
{
  "signing": {
    "default": {
        "usages": [
          "signing",
          "key encipherment",
          "server auth",
          "client auth"
        ],
        "expiry": "87600h"
    }
  }
}
# etcd-root-ca-csr.json
{
    "CN": "etcd-root-ca",
    "key": {
        "algo": "rsa",
        "size": 4096
    },
    "names": [
        {
            "O": "etcd",
            "OU": "etcd Security",
            "L": "Chengdu",
            "ST": "Sichuan",
            "C": "CN"
        }
    ],
    "ca": {
        "expiry": "87600h"
    }
}

生成证书

        
#!/usr/bin/env bash

set -e

cfssl gencert --initca=true etcd-root-ca-csr.json | cfssljson --bare etcd-root-ca
cfssl gencert --ca etcd-root-ca.pem --ca-key etcd-root-ca-key.pem --config etcd-gencert.json etcd-csr.json | cfssljson --bare etcd

复制证书到 ssl 目录

        
# 复制证书到etcd目录
cp -rf /etc/etcd/cfssl/*.pem /etc/etcd/ssl
# 更新权限
chown etcd: /etc/etcd/ssl

配置文件

三台ETCD节点，都需要根据实际情况配置

        
        
        
    
# This is the configuration file for the etcd server.

# Human-readable name for this member.
# 不能重复
name: 'etcd-01'

# Path to the data directory.
data-dir: '/data/etcd/data'

# Path to the dedicated wal directory.
wal-dir: '/data/etcd/wal'

# Number of committed transactions to trigger a snapshot to disk.
#snapshot-count: 10000
snapshot-count: 1000

# Time (in milliseconds) of a heartbeat interval.
heartbeat-interval: 100

# Time (in milliseconds) for an election to timeout.
election-timeout: 1000

# Raise alarms when backend size exceeds the given quota. 0 means use the
# default quota.
#quota-backend-bytes: 0
# 5G
quota-backend-bytes: 5368709120

# List of comma separated URLs to listen on for peer traffic.
listen-peer-urls: https://192.168.1.27:2380

# List of comma separated URLs to listen on for client traffic.
listen-client-urls: https://192.168.1.27:2379,https://127.0.0.1:2379

# Maximum number of snapshot files to retain (0 is unlimited).
max-snapshots: 10

# Maximum number of wal files to retain (0 is unlimited).
max-wals: 10

# Comma-separated white list of origins for CORS (cross-origin resource sharing).
cors:

# List of this member's peer URLs to advertise to the rest of the cluster.
# The URLs needed to be a comma-separated list.
initial-advertise-peer-urls: https://192.168.1.27:2380

# List of this member's client URLs to advertise to the public.
# The URLs needed to be a comma-separated list.
advertise-client-urls: https://192.168.1.27:2379

# Discovery URL used to bootstrap the cluster.
discovery:

# Valid values include 'exit', 'proxy'
discovery-fallback: 'proxy'

# HTTP proxy to use for traffic to discovery service.
discovery-proxy:

# DNS domain used to bootstrap initial cluster.
discovery-srv:

# Initial cluster configuration for bootstrapping.
initial-cluster: etcd-01=https://192.168.1.27:2380,etcd-02=https://192.168.1.28:2380,etcd-03=https://192.168.1.29:2380

# Initial cluster token for the etcd cluster during bootstrap.
# 随机字符串，相同集群内，该配置需要一致
# 可使用shell命令生成： openssl rand -base64 15 
initial-cluster-token: 'krMWJC2ha5'

# Initial cluster state ('new' or 'existing').
initial-cluster-state: 'new'

# Reject reconfiguration requests that would cause quorum loss.
strict-reconfig-check: true

# Enable runtime profiling data via HTTP server
enable-pprof: true

# Valid values include 'on', 'readonly', 'off'
proxy: 'off'

# Time (in milliseconds) an endpoint will be held in a failed state.
proxy-failure-wait: 5000

# Time (in milliseconds) of the endpoints refresh interval.
proxy-refresh-interval: 30000

# Time (in milliseconds) for a dial to timeout.
proxy-dial-timeout: 1000

# Time (in milliseconds) for a write to timeout.
proxy-write-timeout: 5000

# Time (in milliseconds) for a read to timeout.
proxy-read-timeout: 0

client-transport-security:
  # Path to the client server TLS cert file.
  cert-file: /etc/etcd/ssl/etcd.pem

  # Path to the client server TLS key file.
  key-file: /etc/etcd/ssl/etcd-key.pem

  # Enable client cert authentication.
  client-cert-auth: true

  # Path to the client server TLS trusted CA cert file.
  trusted-ca-file: /etc/etcd/ssl/etcd-root-ca.pem

  # Client TLS using generated certificates
  auto-tls: true

peer-transport-security:
  # Path to the peer server TLS cert file.
  cert-file: /etc/etcd/ssl/etcd.pem

  # Path to the peer server TLS key file.
  key-file: /etc/etcd/ssl/etcd-key.pem

  # Enable peer client cert authentication.
  client-cert-auth: true

  # Path to the peer server TLS trusted CA cert file.
  trusted-ca-file: /etc/etcd/ssl/etcd-root-ca.pem

  # Peer TLS using generated certificates.
  auto-tls: true

# The validity period of the self-signed certificate, the unit is year.
self-signed-cert-validity: 1

# Enable debug-level logging for etcd.
log-level: debug

logger: zap

# Specify 'stdout' or 'stderr' to skip journald logging even when running under systemd.
log-outputs: [systemd/journal]

# Force to create a new one member cluster.
force-new-cluster: false

auto-compaction-mode: revision
auto-compaction-retention: "300"

常用命令

        
# 健康检查
etcdctl endpoint health --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379
# 清空 ETCD 数据 慎重
etcdctl --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379 del "" --prefix

Docker

镜像源

大陆以外地区推荐直接使用官方源

国内服务器建议使用阿里云镜像源：https://developer.aliyun.com/mirror/docker-ce

        
        
        
    
# 阿里ECS 可以直接使用阿里云内部的源
# cat /etc/yum.repos.d/docker-ce.repo
[docker-ce-stable]
name=Docker CE Stable - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/stable
enabled=1
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-stable-debuginfo]
name=Docker CE Stable - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/stable
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-stable-source]
name=Docker CE Stable - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/stable
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test]
name=Docker CE Test - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test-debuginfo]
name=Docker CE Test - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-test-source]
name=Docker CE Test - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/test
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly]
name=Docker CE Nightly - $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/$basearch/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly-debuginfo]
name=Docker CE Nightly - Debuginfo $basearch
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/debug-$basearch/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

[docker-ce-nightly-source]
name=Docker CE Nightly - Sources
baseurl=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/$releasever/source/nightly
enabled=0
gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/docker-ce/linux/centos/gpg

安装和配置

为配合 k8s 1.21.13，安装docker版本 20.10.14，安装命令如下：

        
        
        
    
# 更新yum 缓存
yum makecache fast
# 安装
yum -y install docker-ce-20.10.14-3.el7
# 配置
# registry-mirror 仅限公司内部使用
# cat /etc/docker/daemon.json 
{
  "exec-opts": ["native.cgroupdriver=systemd"],
  "registry-mirror": [
      "https://e88hruwq.mirror.aliyuncs.com" 
  ],
  "data-root": "/data/docker_data",
  "log-driver": "json-file",
  "log-level": "",
  "log-opts": {
      "max-size": "10m",
      "max-file": "3"
  }
}
# 开机启动
systemctl enable docker
systemctl restart docker

Kubernetes

Pod Switch

terway依赖于阿里云的交换机，需要在阿里云控制台创建一个ECS可用区的交换机作为Pod子网，

本文以 192.168.32.0/19 例

若使用 kube-flannel 作为集群CNI 插件则不需要该配置

镜像源

大陆以外地区推荐直接使用官方源

国内同样推荐使用阿里云镜像源：https://developer.aliyun.com/mirror/kubernetes

ECS可以直接使用阿里内网镜像源

        
        
        
    
# cat /etc/yum.repos.d/kubernetes.repo 
[kubernetes]
name=Kubernetes
baseurl=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/yum-key.gpg http://mirrors.cloud.aliyuncs.com/kubernetes/yum/doc/rpm-package-key.gpg

组件安装

        
yum install -y kubelet-1.21.13-0 kubeadm-1.21.13-0 kubectl-1.21.13-0 ebtables ethtool
# 关闭升级，防止引起问题
echo 'exclude=kubelet kubeadm kubectl docker-ce docker-ce-cli' >> /etc/yum.conf

/usr/lib/systemd/system/kubelet.service

根据ccm要求修改 kubelet服务启动参数

        
        
        
    
# cat /usr/lib/systemd/system/kubelet.service
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
Wants=network-online.target
After=network-online.target

[Service]
#ExecStart=/usr/bin/kubelet
ExecStart=/usr/bin/kubelet --cloud-provider=external --hostname-override=xxx.xxx --provider-id=xxx.xxx
Restart=always
StartLimitInterval=0
RestartSec=10

[Install]
WantedBy=multi-user.target

hostname-override 和 provider-id 的值需要在对应的ECS上执行下面的命令获取：

echo curl -s http://100.100.100.200/latest/meta-data/region-id.curl -s http://100.100.100.200/latest/meta-data/instance-id

修改完成后需要重载 systemd 并添加开机启动

        
systemctl daemon-reload
systemctl enable kubelet

k8s安装

kubeadm 配置

以下是kubeadm.yaml文件示例

        
        
        
    
apiVersion: kubeadm.k8s.io/v1beta2
kind: InitConfiguration
#nodeRegistration:
#    criSocket: /run/containerd/containerd.sock
#    name: containerd
localAPIEndpoint:
  # 第一个 master 节点 IP
  advertiseAddress: "192.168.1.27"
  # 6443 留给了 nginx，apiserver 换到 5443
  bindPort: 5443
# 这个 token 使用以下命令生成
# kubeadm certs certificate-key
certificateKey: e8734dd17eaab2d55cfb363037f0d27cdd0f0cf1016d163d07428ac3aa9e19f1
---
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
# 使用外部 etcd 配置
etcd:
  external:
    endpoints:
    - "https://192.168.1.27:2379"
    - "https://192.168.1.28:2379"
    - "https://192.168.1.29:2379"
    caFile: "/etc/etcd/ssl/etcd-root-ca.pem"
    certFile: "/etc/etcd/ssl/etcd.pem"
    keyFile: "/etc/etcd/ssl/etcd-key.pem"
# 网络配置
networking:
  serviceSubnet: "10.25.0.0/16"
  # flannel
  #podSubnet: "10.30.0.1/16"
  # terway
  # 需要在阿里控制面板创建专门的虚拟交换机给pod使用
  podSubnet: "192.168.32.0/19"
  dnsDomain: "cluster.local"
kubernetesVersion: "v1.21.13"
# 全局 apiserver LB 地址，由于采用了 nginx/haproxy 负载，所以直接指向本地既可
controlPlaneEndpoint: "127.0.0.1:6443"
apiServer:
  # apiserver 的自定义扩展参数
  extraArgs:
    v: "4"
    alsologtostderr: "true"
    # 审计日志相关配置
    audit-log-maxage: "20"
    audit-log-maxbackup: "10"
    audit-log-maxsize: "100"
    audit-log-path: "/var/log/kube-audit/audit.log"
    audit-policy-file: "/etc/kubernetes/audit-policy.yaml"
    authorization-mode: "Node,RBAC"
    event-ttl: "720h"
    runtime-config: "api/all=true"
    service-node-port-range: "30000-50000"
    service-cluster-ip-range: "10.25.0.0/16"
  # 由于自行定义了审计日志配置，所以需要将宿主机上的审计配置
  # 挂载到 kube-apiserver 的 pod 容器中
  extraVolumes:
  - name: "audit-config"
    hostPath: "/etc/kubernetes/audit-policy.yaml"
    mountPath: "/etc/kubernetes/audit-policy.yaml"
    readOnly: true
    pathType: "File"
  - name: "audit-log"
    hostPath: "/var/log/kube-audit"
    mountPath: "/var/log/kube-audit"
    pathType: "DirectoryOrCreate"
  # 这里是 apiserver 的证书地址配置
  # 为了防止以后出特殊情况，我增加了一个泛域名
  certSANs:
  - "*.k8s.node"
  - "192.168.1.27"
  - "192.168.1.28"
  - "192.168.1.30"
  - "127.0.0.1"
  timeoutForControlPlane: 5m
controllerManager:
  extraArgs:
    v: "4"
    # 宿主机 ip 掩码
    node-cidr-mask-size: "19"
    deployment-controller-sync-period: "10s"
    experimental-cluster-signing-duration: "87600h"
    node-monitor-grace-period: "20s"
    pod-eviction-timeout: "2m"
    terminated-pod-gc-threshold: "30"
scheduler:
  extraArgs:
    v: "4"
certificatesDir: "/etc/kubernetes/pki"
# gcr.io 被墙
imageRepository: "registry.aliyuncs.com/google_containers"
#imageRepository: "registry.cn-chengdu.aliyuncs.com/archky-k8s"
clusterName: "kuberentes"
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
# kubelet specific options here
# 此配置保证了 kubelet 能在 swap 开启的情况下启动
failSwapOn: false
nodeStatusUpdateFrequency: 5s
# 一些驱逐阀值，具体自行查文档修改
evictionSoft:
  "imagefs.available": "15%"
  "memory.available": "512Mi"
  "nodefs.available": "15%"
  "nodefs.inodesFree": "10%"
evictionSoftGracePeriod:
  "imagefs.available": "3m"
  "memory.available": "1m"
  "nodefs.available": "3m"
  "nodefs.inodesFree": "1m"
evictionHard:
  "imagefs.available": "10%"
  "memory.available": "256Mi"
  "nodefs.available": "10%"
  "nodefs.inodesFree": "5%"
evictionMaxPodGracePeriod: 30
imageGCLowThresholdPercent: 70
imageGCHighThresholdPercent: 80
kubeReserved:
  "cpu": "500m"
  "memory": "512Mi"
  "ephemeral-storage": "1Gi"
rotateCertificates: true
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
# kube-proxy specific options here
# 建议与 podSubnet 保持一致
clusterCIDR: "192.168.32.0/19"
# 启用 ipvs 模式
mode: "ipvs"
ipvs:
  minSyncPeriod: 5s
  syncPeriod: 5s
  # ipvs 负载策略
  scheduler: "wrr"

请仔细查看 kubeadm.yml的配置及相关注释

安全组

为保证内网互通，需要到阿里云安全组中允许 kubeadm.yaml中的几个网段互通

安装 Control Plane

        
# 安装前先检查配置文件
kubeadm init phase preflight --config=kubeadm.yaml
# --v 指定日志级别
kubeadm init --config kubeadm.yaml --upload-certs --v=6

正常情况下，命令执行完成后会打印类似信息

        
        
        
    
Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a Pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  /docs/concepts/cluster-administration/addons/

You can now join any number of machines by running the following on each node
as root:

  kubeadm join 127.0.0.1:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash>
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 127.0.0.1:6443 --token <token> \
    --discovery-token-ca-cert-hash sha256:<hash>

配置 kubectl

可使用以下脚本配置 kubectl命令

        
#!/bin/bash

rm -rf $HOME/.kube
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

检查集群状态

        
        
        
    
# 查看集群组件状态
kubectl get cs
# 查看 节点状态
kubectl get node
# 查看所有pod状态
kubectl get pod -A

卸载集群

当集群或节点安装失败时，我们可以卸载后重新安装

        
        
        
    
# 删除节点
kubectl drain <node_name> --delete-emptydir-data --force --ignore-daemonsets
# 重置集群
kubeadm reset
# 删除多余的虚拟网卡
ip link del flannel.1
ip link del cni0
# 清空 ETCD 数据 慎重
etcdctl --cacert /etc/etcd/ssl/etcd-root-ca.pem --cert /etc/etcd/ssl/etcd.pem --key /etc/etcd/ssl/etcd-key.pem --endpoints https://192.168.1.27:2379,https://192.168.1.28:2379,https://192.168.1.30:2379 del "" --prefix

安装CNI

阿里云有关 Terway和Flannel的选择和对比，可以看阿里云官方文档：Kubernetes集群网络规划

Terway

建议先阅读

官方文档：使用Terway网络插件
Github 文档：Terway 网络插件

Terway有两种安装模式：

VPC模式
- VPC模式，使用Aliyun VPC路由来打通网络，可以使用独立ENI给Pod，安装方式：修改terway.yml文件中的eni.conf的配置中的授权和网段配置，以及Network的网段配置，然后通过kubectl apply -f terway.yml来安装terway插件。
ENI多IP模式
- ENI多IP模式，使用Aliyun ENI的辅助IP来打通网络，不受VPC的路由条目限制，安装方式：修改terway-multiip.yml文件中的eni.conf的配置中的授权和资源配置，然后通过kubectl apply -f terway-multiip.yml来安装terway插件。

因为Terway会自动创建弹性网卡，所以它需要相关权限

在阿里控制台创建子用户并添加相关的权限，如下:

        
        
        
    
{
  "Version": "1",
  "Statement": [
    {
      "Action": [
        "ecs:CreateNetworkInterface",
        "ecs:DescribeNetworkInterfaces",
        "ecs:AttachNetworkInterface",
        "ecs:DetachNetworkInterface",
        "ecs:DeleteNetworkInterface",
        "ecs:DescribeInstanceAttribute",
        "ecs:DescribeInstanceTypes",
        "ecs:AssignPrivateIpAddresses",
        "ecs:UnassignPrivateIpAddresses",
        "ecs:DescribeInstances",
        "ecs:ModifyNetworkInterfaceAttribute"
      ],
      "Resource": [
        "*"
      ],
      "Effect": "Allow"
    },
    {
      "Action": [
        "vpc:DescribeVSwitches"
      ],
      "Resource": [
        "*"
      ],
      "Effect": "Allow"
    }
  ]
}

我们使用 terway的 ENI多IP模式

        
        
        
    
apiVersion: v1
kind: ServiceAccount
metadata:
  name: terway
  namespace: kube-system

---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: terway-pod-reader
  namespace: kube-system
rules:
  - apiGroups: [ "" ]
    resources: [ "pods", "nodes", "namespaces", "configmaps", "serviceaccounts" ]
    verbs: [ "get", "watch", "list", "update" ]
  - apiGroups: [ "" ]
    resources:
      - events
    verbs:
      - create
  - apiGroups: [ "networking.k8s.io" ]
    resources:
      - networkpolicies
    verbs:
      - get
      - list
      - watch
  - apiGroups: [ "extensions" ]
    resources:
      - networkpolicies
    verbs:
      - get
      - list
      - watch
  - apiGroups: [ "" ]
    resources:
      - pods/status
    verbs:
      - update
  - apiGroups: [ "crd.projectcalico.org" ]
    resources: [ "*" ]
    verbs: [ "*" ]

---

apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: terway-binding
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: terway-pod-reader
subjects:
  - kind: ServiceAccount
    name: terway
    namespace: kube-system

---

kind: ConfigMap
apiVersion: v1
metadata:
  name: eni-config
  namespace: kube-system
data:
  eni_conf: |
    {
      "version": "1",
      "access_key": "ak",
      "access_secret": "ak",
      "security_group": "sg-xxx",
      "service_cidr": "10.25.0.0/16",
      "vswitches": {
        "cn-hongkong-b": ["vsw-xxx"]
      },
      "max_pool_size": 5,
      "min_pool_size": 0
    }
  10-terway.conf: |
    {
      "cniVersion": "0.3.1",
      "name": "terway",
      "type": "terway",
      "eniip_virtual_type": "Veth",
      "ip_stack": "ipv4"
    }
  # eniip_virtual_type: virtual type for eni multi ip "Veth" || "IPVlan"
  disable_network_policy: "false"

---

apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: terway
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: terway
  template:
    metadata:
      labels:
        app: terway
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ''
    spec:
      hostPID: true
      nodeSelector:
        beta.kubernetes.io/arch: amd64
      tolerations:
        - operator: "Exists"
      terminationGracePeriodSeconds: 0
      serviceAccountName: terway
      hostNetwork: true
      initContainers:
        - name: terway-init
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          securityContext:
            privileged: true
          command:
            - 'sh'
            - '-c'
            - 'cp /usr/bin/terway /opt/cni/bin/;
                  chmod +x /opt/cni/bin/terway;
                  cp /etc/eni/10-terway.conf /etc/cni/net.d/;
                  sysctl -w net.ipv4.conf.eth0.rp_filter=0;
                  modprobe sch_htb || true;
                  chroot /host sh -c "systemctl disable eni.service; rm -f /etc/udev/rules.d/75-persistent-net-generator.rules /lib/udev/rules.d/60-net.rules /lib/udev/rules.d/61-eni.rules /lib/udev/write_net_rules && udevadm control --reload-rules && udevadm trigger; true"'
          volumeMounts:
            - name: configvolume
              mountPath: /etc/eni
            - name: cni-bin
              mountPath: /opt/cni/bin/
            - name: cni
              mountPath: /etc/cni/net.d/
            - mountPath: /lib/modules
              name: lib-modules
            - mountPath: /host
              name: host-root
      containers:
        - name: terway
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          command: [ '/usr/bin/terwayd', '-log-level', 'debug', '-daemon-mode', 'ENIMultiIP' ]
          securityContext:
            privileged: true
          env:
            - name: NODE_NAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          volumeMounts:
            - name: configvolume
              mountPath: /etc/eni
            - mountPath: /var/run/
              name: eni-run
            - mountPath: /opt/cni/bin/
              name: cni-bin
            - mountPath: /lib/modules
              name: lib-modules
            - mountPath: /var/lib/cni/networks
              name: cni-networks
            - mountPath: /var/lib/cni/terway
              name: cni-terway
            - mountPath: /var/lib/kubelet/device-plugins
              name: device-plugin-path
        - name: policy
          image: registry.aliyuncs.com/acs/terway:v1.2.3
          imagePullPolicy: IfNotPresent
          command: [ "/bin/policyinit.sh" ]
          env:
            - name: NODENAME
              valueFrom:
                fieldRef:
                  fieldPath: spec.nodeName
            - name: DISABLE_POLICY
              valueFrom:
                configMapKeyRef:
                  name: eni-config
                  key: disable_network_policy
                  optional: true
          securityContext:
            privileged: true
          resources:
            requests:
              cpu: 250m
          livenessProbe:
            tcpSocket:
              port: 9099
              host: localhost
            periodSeconds: 10
            initialDelaySeconds: 10
            failureThreshold: 6
          readinessProbe:
            tcpSocket:
              port: 9099
              host: localhost
            periodSeconds: 10
          volumeMounts:
            - mountPath: /lib/modules
              name: lib-modules
      volumes:
        - name: configvolume
          configMap:
            name: eni-config
            items:
              - key: eni_conf
                path: eni.json
              - key: 10-terway.conf
                path: 10-terway.conf
        - name: cni-bin
          hostPath:
            path: /opt/cni/bin
            type: "Directory"
        - name: cni
          hostPath:
            path: /etc/cni/net.d
        - name: eni-run
          hostPath:
            path: /var/run/
            type: "Directory"
        - name: lib-modules
          hostPath:
            path: /lib/modules
        - name: cni-networks
          hostPath:
            path: /var/lib/cni/networks
        - name: cni-terway
          hostPath:
            path: /var/lib/cni/terway
        - name: device-plugin-path
          hostPath:
            path: /var/lib/kubelet/device-plugins
            type: "Directory"
        - name: host-root
          hostPath:
            path: /
            type: "Directory"

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: felixconfigurations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: FelixConfiguration
    plural: felixconfigurations
    singular: felixconfiguration

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: bgpconfigurations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: BGPConfiguration
    plural: bgpconfigurations
    singular: bgpconfiguration

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: ippools.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: IPPool
    plural: ippools
    singular: ippool

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: hostendpoints.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: HostEndpoint
    plural: hostendpoints
    singular: hostendpoint

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: clusterinformations.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: ClusterInformation
    plural: clusterinformations
    singular: clusterinformation

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: globalnetworkpolicies.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: GlobalNetworkPolicy
    plural: globalnetworkpolicies
    singular: globalnetworkpolicy

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: globalnetworksets.crd.projectcalico.org
spec:
  scope: Cluster
  group: crd.projectcalico.org
  version: v1
  names:
    kind: GlobalNetworkSet
    plural: globalnetworksets
    singular: globalnetworkset

---

apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: networkpolicies.crd.projectcalico.org
spec:
  scope: Namespaced
  group: crd.projectcalico.org
  version: v1
  names:
    kind: NetworkPolicy
    plural: networkpolicies
    singular: networkpolicy

注意修改 terway配置中的镜像版本，建议使用最新的Release

        
        
        
    
# 配置说明
data:
  eni_conf: |
    {
      "version": "1",
      "access_key": "ak",
      "access_secret": "sk",
      "security_group": "ESC的使用的安全组",
      # 与 kubeadm.yaml 的 serviceSubnet 保持一致
      "service_cidr": "10.25.0.0/16",
      # 交换机信息配置，区域和ID
      "vswitches": {
        "cn-hangzhou-k": ["交换机ID"]
      }

flannel

flannel使用 ali-vpc模式时，同样需要创建子用户和授予相应的权限(AliyunVPCFullAccess)，如下图：

配置文件如下：

        
        
        
    
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: psp.flannel.unprivileged
  annotations:
    seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
    seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
    apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
    apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
spec:
  privileged: false
  volumes:
  - configMap
  - secret
  - emptyDir
  - hostPath
  allowedHostPaths:
  - pathPrefix: "/etc/cni/net.d"
  - pathPrefix: "/etc/kube-flannel"
  - pathPrefix: "/run/flannel"
  readOnlyRootFilesystem: false
  # Users and groups
  runAsUser:
    rule: RunAsAny
  supplementalGroups:
    rule: RunAsAny
  fsGroup:
    rule: RunAsAny
  # Privilege Escalation
  allowPrivilegeEscalation: false
  defaultAllowPrivilegeEscalation: false
  # Capabilities
  allowedCapabilities: ['NET_ADMIN', 'NET_RAW']
  defaultAddCapabilities: []
  requiredDropCapabilities: []
  # Host namespaces
  hostPID: false
  hostIPC: false
  hostNetwork: true
  hostPorts:
  - min: 0
    max: 65535
  # SELinux
  seLinux:
    # SELinux is unused in CaaSP
    rule: 'RunAsAny'
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
rules:
- apiGroups: ['extensions']
  resources: ['podsecuritypolicies']
  verbs: ['use']
  resourceNames: ['psp.flannel.unprivileged']
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: flannel
  namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: kube-flannel-cfg
  namespace: kube-system
  labels:
    tier: node
    app: flannel
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "10.30.0.0/16",
      "Backend": {
        "Type": "ali-vpc"
      }
    }
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-system
  labels:
    tier: node
    app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      # 增加DNS服务器223.6.6.6（别的公共服务器也行）
      dnsPolicy: "None"
      dnsConfig:
        nameservers:
        - 223.6.6.6
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      hostNetwork: true
      priorityClassName: system-node-critical
      tolerations:
      - operator: Exists
        effect: NoSchedule
      serviceAccountName: flannel
      initContainers:
      - name: install-cni
        image: quay.io/coreos/flannel:v0.14.0
        command:
        - cp
        args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        volumeMounts:
        - name: cni
          mountPath: /etc/cni/net.d
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      containers:
      - name: kube-flannel
        image: quay.io/coreos/flannel:v0.14.0
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        resources:
          requests:
            cpu: "100m"
            memory: "50Mi"
          limits:
            cpu: "100m"
            memory: "50Mi"
        securityContext:
          privileged: false
          capabilities:
            add: ["NET_ADMIN", "NET_RAW"]
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        # ali-vpc
        - name: ACCESS_KEY_ID
          value: Ak
        - name: ACCESS_KEY_SECRET
          value: Sk
        volumeMounts:
        - name: run
          mountPath: /run/flannel
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      volumes:
      - name: run
        hostPath:
          path: /run/flannel
      - name: cni
        hostPath:
          path: /etc/cni/net.d
      - name: flannel-cfg
        configMap:
          name: kube-flannel-cfg

配置修改

Network字段需要与kubeadm.yaml的podSubnet 保持一致
修改ACCESS_KEY_ID和ACCESS_KEY_SECRET实际的值

添加 control plane

Kubeadm 安装完成后，会显示如何加入其它 Control Plane，但是如果我们直接使用上面的命令是会报错的，需要添加 apiserver相关信息

        
kubeadm join 127.0.0.1:6443 --token <token> --discovery-token-ca-cert-hash sha256:<hash> \
    --apiserver-advertise-address 192.168.1.27 \
    --apiserver-bind-port 5443 \
    --ignore-preflight-errors=Swap

添加Node

启动node节点相对就比较简单了，只需要加一个防止 swap 开启拒绝启动的参数就行

        
kubeadm join 127.0.0.1:6443 --token <token> \
    --discovery-token-ca-cert-hash sha256:<hash> \
    --ignore-preflight-errors=Swap

允许Master运行Pod

默认情况下，control plane 不会参与Pod 调度，可在任意control plane节点上运行以下命令，让其参与调度

        
kubectl taint nodes --all node-role.kubernetes.io/master-

安装CCM插件

简介

Cloud Controller Manager提供Kubernetes与阿里云基础产品的对接能力，例如CLB（原SLB）、VPC等。CCM主要提供以下功能：

管理负载均衡

当Service的类型设置为Type=LoadBalancer时，CCM组件会为该Service创建或配置阿里云负载均衡CLB，包括含CLB、监听、后端服务器组等资源。当Service对应的后端Endpoint或者集群节点发生变化时，CCM会自动更新CLB的后端虚拟服务器组。

实现跨节点通信

当集群网络组件为Flannel时，CCM组件负责打通容器与节点间网络，实现容器跨节点通信。CCM会将节点的Pod网段信息写入VPC的路由表中，从而实现跨节点的容器通信。该功能无需配置，安装即可使用。

注意：任何情况下CCM都不会将Master节点作为SLB的后端。

创建子用户

CCM组件涉及到阿里云的云服务管理，同样需要创建子用户和授予相应的权限

        
        
        
    
{
    "Version": "1",
    "Statement": [
        {
            "Action": [
                "ecs:Describe*",
                "ecs:CreateRouteEntry",
                "ecs:DeleteRouteEntry",
                "ecs:CreateNetworkInterface",
                "ecs:DeleteNetworkInterface",
                "ecs:CreateNetworkInterfacePermission",
                "ecs:DeleteNetworkInterfacePermission",
                "ecs:ModifyInstanceAttribute",
                "ecs:AttachKeyPair",
                "ecs:StopInstance",
                "ecs:StartInstance",
                "ecs:ReplaceSystemDisk"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "slb:Describe*",
                "slb:CreateLoadBalancer",
                "slb:DeleteLoadBalancer",
                "slb:ModifyLoadBalancerInternetSpec",
                "slb:RemoveBackendServers",
                "slb:AddBackendServers",
                "slb:RemoveTags",
                "slb:AddTags",
                "slb:StopLoadBalancerListener",
                "slb:StartLoadBalancerListener",
                "slb:SetLoadBalancerHTTPListenerAttribute",
                "slb:SetLoadBalancerHTTPSListenerAttribute",
                "slb:SetLoadBalancerTCPListenerAttribute",
                "slb:SetLoadBalancerUDPListenerAttribute",
                "slb:CreateLoadBalancerHTTPSListener",
                "slb:CreateLoadBalancerHTTPListener",
                "slb:CreateLoadBalancerTCPListener",
                "slb:CreateLoadBalancerUDPListener",
                "slb:DeleteLoadBalancerListener",
                "slb:CreateVServerGroup",
                "slb:DescribeVServerGroups",
                "slb:DeleteVServerGroup",
                "slb:SetVServerGroupAttribute",
                "slb:DescribeVServerGroupAttribute",
                "slb:ModifyVServerGroupBackendServers",
                "slb:AddVServerGroupBackendServers",
                "slb:ModifyLoadBalancerInstanceSpec",
                "slb:ModifyLoadBalancerInternetSpec",
                "slb:SetLoadBalancerModificationProtection",
                "slb:SetLoadBalancerDeleteProtection",
                "slb:SetLoadBalancerName",
                "slb:RemoveVServerGroupBackendServers"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "vpc:Describe*",
                "vpc:DeleteRouteEntry",
                "vpc:CreateRouteEntry"
            ],
            "Resource": [
                "*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "log:AnalyzeProductLog",
                "log:CreateIndex",
                "log:UpdateIndex",
                "log:DeleteIndex",
                "log:CreateLogStore",
                "log:UpdateLogStore",
                "log:DeleteLogStore",
                "log:CreateDashboard",
                "log:UpdateDashboard",
                "log:DeleteDashboard"
            ],
            "Resource": [
                "acs:log:*:*:project/*/logstore/alb_*",
                "acs:log:*:*:project/*/dashboard/*"
            ],
            "Effect": "Allow"
        },
        {
            "Action": [
                "alb:TagResources",
                "alb:UnTagResources",
                "alb:ListServerGroups",
                "alb:ListServerGroupServers",
                "alb:AddServersToServerGroup",
                "alb:RemoveServersFromServerGroup",
                "alb:ReplaceServersInServerGroup",
                "alb:CreateLoadBalancer",
                "alb:DeleteLoadBalancer",
                "alb:UpdateLoadBalancerAttribute",
                "alb:UpdateLoadBalancerEdition",
                "alb:EnableLoadBalancerAccessLog",
                "alb:DisableLoadBalancerAccessLog",
                "alb:EnableDeletionProtection",
                "alb:DisableDeletionProtection",
                "alb:ListLoadBalancers",
                "alb:GetLoadBalancerAttribute",
                "alb:ListListeners",
                "alb:CreateListener",
                "alb:GetListenerAttribute",
                "alb:UpdateListenerAttribute",
                "alb:ListListenerCertificates",
                "alb:AssociateAdditionalCertificatesWithListener",
                "alb:DissociateAdditionalCertificatesFromListener",
                "alb:DeleteListener",
                "alb:CreateRule",
                "alb:DeleteRule",
                "alb:UpdateRuleAttribute",
                "alb:CreateRules",
                "alb:UpdateRulesAttribute",
                "alb:DeleteRules",
                "alb:ListRules",
                "alb:CreateServerGroup",
                "alb:DeleteServerGroup",
                "alb:UpdateServerGroupAttribute",
                "alb:DescribeZones"
            ],
            "Resource": "*",
            "Effect": "Allow"
        },
        {
            "Action": "ram:CreateServiceLinkedRole",
            "Resource": "*",
            "Effect": "Allow",
            "Condition": {
                "StringEquals": {
                    "ram:ServiceName": [
                        "alb.aliyuncs.com",
                        "logdelivery.alb.aliyuncs.com"
                    ]
                }
            }
        },
        {
            "Action": [
                "yundun-cert:DescribeSSLCertificateList",
                "yundun-cert:DescribeSSLCertificatePublicKeyDetail"
            ],
            "Resource": "*",
            "Effect": "Allow"
        }
    ]
}

配置kubelet和Node

在组件安装部分，我们已经提前为每台节点上的 kubelet 配置了启动参数，除此之外，我们还需要通过 patch的方式修改node的参数，命令如下：

        
NODE_NAME=$(echo `curl -s http://100.100.100.200/latest/meta-data/region-id`.`curl -s http://100.100.100.200/latest/meta-data/instance-id`)
kubectl patch node ${NODE_NAME} -p '{"spec":{"providerID": "${NODE_NAME}"}}'

创建 configMap

使用以下脚本创建 ccm 的 config

        
        
        
    
#!/bin/bash
# create ConfigMap kube-system/cloud-config for CCM.

ACCESS_KEY_ID="xxxx"
ACCESS_KEY_SECRET="xxxx"

# base64 AccessKey & AccessKeySecret
accessKeyIDBase64=`echo -n "$ACCESS_KEY_ID" |base64 -w 0`
accessKeySecretBase64=`echo -n "$ACCESS_KEY_SECRET"|base64 -w 0`

cat <<EOF >cloud-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: cloud-config
  namespace: kube-system
data:
  cloud-config.conf: |-
    {
        "Global": {
            "accessKeyID": "$accessKeyIDBase64",
            "accessKeySecret": "$accessKeySecretBase64"
        }
    }
EOF

kubectl create -f cloud-config.yaml

安装CCM

建议使用 github上的 cloud-controller-manager.yml，进行安装

        
        
        
    
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: system:cloud-controller-manager
rules:
  - apiGroups:
      - coordination.k8s.io
    resources:
      - leases
    verbs:
      - get
      - list
      - update
      - create
  - apiGroups:
      - ""
    resources:
      - persistentvolumes
      - services
      - secrets
      - endpoints
      - serviceaccounts
    verbs:
      - get
      - list
      - watch
      - create
      - update
      - patch
  - apiGroups:
      - ""
    resources:
      - nodes
    verbs:
      - get
      - list
      - watch
      - delete
      - patch
      - update
  - apiGroups:
      - ""
    resources:
      - services/status
    verbs:
      - update
      - patch
  - apiGroups:
      - ""
    resources:
      - nodes/status
    verbs:
      - patch
      - update
  - apiGroups:
      - ""
    resources:
      - events
      - endpoints
    verbs:
      - create
      - patch
      - update
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cloud-controller-manager
  namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:cloud-controller-manager
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: cloud-controller-manager
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:shared-informers
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: shared-informers
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:cloud-node-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: cloud-node-controller
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:pvl-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: pvl-controller
    namespace: kube-system
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: system:route-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:cloud-controller-manager
subjects:
  - kind: ServiceAccount
    name: route-controller
    namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: cloud-controller-manager
    tier: control-plane
  name: cloud-controller-manager
  namespace: kube-system
spec:
  selector:
    matchLabels:
      app: cloud-controller-manager
      tier: control-plane
  template:
    metadata:
      labels:
        app: cloud-controller-manager
        tier: control-plane
      annotations:
        scheduler.alpha.kubernetes.io/critical-pod: ''
    spec:
      serviceAccountName: cloud-controller-manager
      tolerations:
        - effect: NoSchedule
          operator: Exists
          key: node-role.kubernetes.io/master
        - effect: NoSchedule
          operator: Exists
          key: node.cloudprovider.kubernetes.io/uninitialized
      nodeSelector:
        node-role.kubernetes.io/master: ""
      containers:
        - command:
          -  /cloud-controller-manager
          - --leader-elect=true
          - --cloud-provider=alicloud
          - --use-service-account-credentials=true
          - --cloud-config=/etc/kubernetes/config/cloud-config.conf
          - --route-reconciliation-period=3m
          - --leader-elect-resource-lock=endpoints
          # replace ${cluster-cidr} with your own cluster cidr
          # example: 172.16.0.0/16
          - --configure-cloud-routes=false
          - --allocate-node-cidrs=false
          - --cluster-cidr=192.168.32.0/19
          # replace ${ImageVersion} with the latest release version
          # example: v2.1.0
          image: registry.cn-hangzhou.aliyuncs.com/acs/cloud-controller-manager-amd64:v2.3.0
          livenessProbe:
            failureThreshold: 8
            httpGet:
              host: 127.0.0.1
              path: /healthz
              port: 10258
              scheme: HTTP
            initialDelaySeconds: 15
            timeoutSeconds: 15
          name: cloud-controller-manager
          resources:
            requests:
              cpu: 200m
          volumeMounts:
            - mountPath: /etc/kubernetes/
              name: k8s
            - mountPath: /etc/ssl/certs
              name: certs
            - mountPath: /etc/pki
              name: pki
            - mountPath: /etc/kubernetes/config
              name: cloud-config
      hostNetwork: true
      volumes:
        - hostPath:
            path: /etc/kubernetes
          name: k8s
        - hostPath:
            path: /etc/ssl/certs
          name: certs
        - hostPath:
            path: /etc/pki
          name: pki
        - configMap:
            defaultMode: 420
            items:
              - key: cloud-config.conf
                path: cloud-config.conf
            name: cloud-config
          name: cloud-config

检查CCM

我们可以通过查看Pod状态来检查CCM是否安装成功

kubectl get pod -n kube-system

扩展阅读

Master作为SLB后端

在阿里云文档的注意事项中有提到：任何情况下CCM都不会将Master节点作为SLB的后端。

可能是基于性能考虑，如果实在要将Master作为SLB后端，我们可以直接修改源码

在github Issues 中有人提到源码是如何过滤master节点，在尊重源码的情况下，我通过给master新增label的方式达到目的，参考代码

安装 ingress

Ingress-controller

        
        
        
    
apiVersion: v1
kind: Namespace
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  name: ingress-nginx
---
apiVersion: v1
automountServiceAccountToken: true
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - namespaces
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - configmaps
  - pods
  - secrets
  - endpoints
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - services
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses/status
  verbs:
  - update
- apiGroups:
  - networking.k8s.io
  resources:
  - ingressclasses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resourceNames:
  - ingress-controller-leader
  resources:
  - configmaps
  verbs:
  - get
  - update
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - create
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - secrets
  verbs:
  - get
  - create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
rules:
- apiGroups:
  - ""
  resources:
  - configmaps
  - endpoints
  - nodes
  - pods
  - secrets
  - namespaces
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - services
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - events
  verbs:
  - create
  - patch
- apiGroups:
  - networking.k8s.io
  resources:
  - ingresses/status
  verbs:
  - update
- apiGroups:
  - networking.k8s.io
  resources:
  - ingressclasses
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
rules:
- apiGroups:
  - admissionregistration.k8s.io
  resources:
  - validatingwebhookconfigurations
  verbs:
  - get
  - update
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
  namespace: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: ingress-nginx
subjects:
- kind: ServiceAccount
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
  namespace: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: ingress-nginx-admission
subjects:
- kind: ServiceAccount
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ingress-nginx
subjects:
- kind: ServiceAccount
  name: ingress-nginx
  namespace: ingress-nginx
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: ingress-nginx-admission
subjects:
- kind: ServiceAccount
  name: ingress-nginx-admission
  namespace: ingress-nginx
---
apiVersion: v1
data:
  allow-snippet-annotations: "true"
kind: ConfigMap
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller
  namespace: ingress-nginx
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller-admission
  namespace: ingress-nginx
spec:
  ports:
  - appProtocol: https
    name: https-webhook
    port: 443
    targetPort: webhook
  selector:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-controller
  namespace: ingress-nginx
spec:
  minReadySeconds: 0
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app.kubernetes.io/component: controller
      app.kubernetes.io/instance: ingress-nginx
      app.kubernetes.io/name: ingress-nginx
  template:
    metadata:
      labels:
        app.kubernetes.io/component: controller
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
    spec:
      containers:
      - args:
        - /nginx-ingress-controller
        - --publish-service=$(POD_NAMESPACE)/ingress-nginx-controller
        - --election-id=ingress-controller-leader
        - --controller-class=k8s.io/ingress-nginx
        - --ingress-class=nginx
        - --configmap=$(POD_NAMESPACE)/ingress-nginx-controller
        - --validating-webhook=:8443
        - --validating-webhook-certificate=/usr/local/certificates/cert
        - --validating-webhook-key=/usr/local/certificates/key
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: LD_PRELOAD
          value: /usr/local/lib/libmimalloc.so
        image: registry.cn-hangzhou.aliyuncs.com/google_containers/nginx-ingress-controller:v1.2.0
        imagePullPolicy: IfNotPresent
        lifecycle:
          preStop:
            exec:
              command:
              - /wait-shutdown
        livenessProbe:
          failureThreshold: 5
          httpGet:
            path: /healthz
            port: 10254
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 1
        name: controller
        ports:
        - containerPort: 80
          name: http
          protocol: TCP
        - containerPort: 443
          name: https
          protocol: TCP
        - containerPort: 8443
          name: webhook
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /healthz
            port: 10254
            scheme: HTTP
          initialDelaySeconds: 10
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 1
        resources:
          requests:
            cpu: 100m
            memory: 90Mi
        securityContext:
          allowPrivilegeEscalation: true
          capabilities:
            add:
            - NET_BIND_SERVICE
            drop:
            - ALL
          runAsUser: 101
        volumeMounts:
        - mountPath: /usr/local/certificates/
          name: webhook-cert
          readOnly: true
      dnsPolicy: ClusterFirst
      nodeSelector:
        kubernetes.io/os: linux
      serviceAccountName: ingress-nginx
      terminationGracePeriodSeconds: 300
      volumes:
      - name: webhook-cert
        secret:
          secretName: ingress-nginx-admission
---
apiVersion: batch/v1
kind: Job
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission-create
  namespace: ingress-nginx
spec:
  template:
    metadata:
      labels:
        app.kubernetes.io/component: admission-webhook
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
        app.kubernetes.io/part-of: ingress-nginx
        app.kubernetes.io/version: 1.2.0
      name: ingress-nginx-admission-create
    spec:
      containers:
      - args:
        - create
        - --host=ingress-nginx-controller-admission,ingress-nginx-controller-admission.$(POD_NAMESPACE).svc
        - --namespace=$(POD_NAMESPACE)
        - --secret-name=ingress-nginx-admission
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        image: registry.cn-chengdu.aliyuncs.com/archly-k8s/kube-webhook-certgen:v1.1.1
        imagePullPolicy: IfNotPresent
        name: create
        securityContext:
          allowPrivilegeEscalation: false
      nodeSelector:
        kubernetes.io/os: linux
      restartPolicy: OnFailure
      securityContext:
        fsGroup: 2000
        runAsNonRoot: true
        runAsUser: 2000
      serviceAccountName: ingress-nginx-admission
---
apiVersion: batch/v1
kind: Job
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission-patch
  namespace: ingress-nginx
spec:
  template:
    metadata:
      labels:
        app.kubernetes.io/component: admission-webhook
        app.kubernetes.io/instance: ingress-nginx
        app.kubernetes.io/name: ingress-nginx
        app.kubernetes.io/part-of: ingress-nginx
        app.kubernetes.io/version: 1.2.0
      name: ingress-nginx-admission-patch
    spec:
      containers:
      - args:
        - patch
        - --webhook-name=ingress-nginx-admission
        - --namespace=$(POD_NAMESPACE)
        - --patch-mutating=false
        - --secret-name=ingress-nginx-admission
        - --patch-failure-policy=Fail
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        image: registry.cn-chengdu.aliyuncs.com/archly-k8s/kube-webhook-certgen:v1.1.1
        imagePullPolicy: IfNotPresent
        name: patch
        securityContext:
          allowPrivilegeEscalation: false
      nodeSelector:
        kubernetes.io/os: linux
      restartPolicy: OnFailure
      securityContext:
        fsGroup: 2000
        runAsNonRoot: true
        runAsUser: 2000
      serviceAccountName: ingress-nginx-admission
---
apiVersion: networking.k8s.io/v1
kind: IngressClass
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: nginx
spec:
  controller: k8s.io/ingress-nginx
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
  labels:
    app.kubernetes.io/component: admission-webhook
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  name: ingress-nginx-admission
webhooks:
- admissionReviewVersions:
  - v1
  clientConfig:
    service:
      name: ingress-nginx-controller-admission
      namespace: ingress-nginx
      path: /networking/v1/ingresses
  failurePolicy: Fail
  matchPolicy: Equivalent
  name: validate.nginx.ingress.kubernetes.io
  rules:
  - apiGroups:
    - networking.k8s.io
    apiVersions:
    - v1
    operations:
    - CREATE
    - UPDATE
    resources:
    - ingresses
  sideEffects: None

SLB With Ingress

在安装 ingress-controoler时，我们并没有接入SLB，在此处单独配置

        
        
        
    
apiVersion: v1
kind: Service
metadata:
  labels:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
    app.kubernetes.io/part-of: ingress-nginx
    app.kubernetes.io/version: 1.2.0
  annotations:
    # 公网模式
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-address-type: "internet"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-force-override-listeners: "true"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-id: "lb-xxxx"
    # 指定虚拟交换机
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-vswitch-id: "vsw-xxxx"
    # 健康检查
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-type: "tcp"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-connect-timeout: "8"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-healthy-threshold: "4"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-unhealthy-threshold: "4"
    service.beta.kubernetes.io/alibaba-cloud-loadbalancer-health-check-interval: "3"
  name: ingress-nginx-controller
  namespace: ingress-nginx
spec:
  externalTrafficPolicy: Local
  ports:
  - appProtocol: http
    name: http
    port: 80
    protocol: TCP
    targetPort: http
  - appProtocol: https
    name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    app.kubernetes.io/component: controller
    app.kubernetes.io/instance: ingress-nginx
    app.kubernetes.io/name: ingress-nginx
  type: LoadBalancer

关于SLB 更多参数说明请查看阿里官方文档

鸣谢

感谢以下项目或文档给予我启发