资源管理

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
[root@master yamls]# cat tomcat-test-deploy.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: tomcat-test
spec:
  replicas: 3
  selector:
    matchLabels:
      app: tomcat-t
  template:
    metadata:
      labels:
        app: tomcat-t
    spec:
      containers:
        - name: tomcat
          image: tomcat
          ports:
            - containerPort: 880
          resources:
            requests:
              memory: 100Mi
              cpu: 100m
            limits:
              memory: 100Mi
              cpu: 200m
---
apiVersion: v1
kind: Service
metadata:
  name: tomcat-t
spec:
  ports:
  - port: 8080
    targetPort: 8080
  selector:
    app: tomcat-t

# 然后可以看见资源的限制
cpu 1000m = 1个cpu
mem 1Mi 1Gi 

[root@master yamls]# kubectl describe node node2
Name:               node2
Roles:              <none>
Labels:             beta.kubernetes.io/arch=amd64
                    beta.kubernetes.io/os=linux
                    kubernetes.io/hostname=node2
Annotations:        flannel.alpha.coreos.com/backend-data={"VtepMAC":"f2:7b:89:b4:22:b2"}
                    flannel.alpha.coreos.com/backend-type=vxlan
                    flannel.alpha.coreos.com/kube-subnet-manager=true
                    flannel.alpha.coreos.com/public-ip=192.168.31.29
                    kubeadm.alpha.kubernetes.io/cri-socket=/var/run/dockershim.sock
                    node.alpha.kubernetes.io/ttl=0
                    volumes.kubernetes.io/controller-managed-attach-detach=true
CreationTimestamp:  Sat, 20 Jul 2019 22:49:13 +0800
Taints:             <none>
Unschedulable:      false
Conditions:
  Type             Status  LastHeartbeatTime                 LastTransitionTime                Reason                       Message
  ----             ------  -----------------                 ------------------                ------                       -------
  OutOfDisk        False   Sun, 21 Jul 2019 14:55:12 +0800   Sat, 20 Jul 2019 22:49:13 +0800   KubeletHasSufficientDisk     kubelet has sufficient disk space available
  MemoryPressure   False   Sun, 21 Jul 2019 14:55:12 +0800   Sat, 20 Jul 2019 22:49:13 +0800   KubeletHasSufficientMemory   kubelet has sufficient memory available
  DiskPressure     False   Sun, 21 Jul 2019 14:55:12 +0800   Sat, 20 Jul 2019 22:49:13 +0800   KubeletHasNoDiskPressure     kubelet has no disk pressure
  PIDPressure      False   Sun, 21 Jul 2019 14:55:12 +0800   Sat, 20 Jul 2019 22:49:13 +0800   KubeletHasSufficientPID      kubelet has sufficient PID available
  Ready            True    Sun, 21 Jul 2019 14:55:12 +0800   Sat, 20 Jul 2019 22:50:03 +0800   KubeletReady                 kubelet is posting ready status
Addresses:
  InternalIP:  192.168.31.29
  Hostname:    node2
Capacity:
 cpu:                2
 ephemeral-storage:  37729284Ki
 hugepages-1Gi:      0
 hugepages-2Mi:      0
 memory:             3027032Ki
 pods:               110
Allocatable:
 cpu:                2
 ephemeral-storage:  34771308077
 hugepages-1Gi:      0
 hugepages-2Mi:      0
 memory:             2924632Ki
 pods:               110
System Info:
 Machine ID:                 d2c787aa4b574668942a67f0c6708743
 System UUID:                61204D56-CF3A-9F5B-0583-68A5EF1B5468
 Boot ID:                    22870460-352b-4fa6-88ca-4b7208762639
 Kernel Version:             3.10.0-862.el7.x86_64
 OS Image:                   CentOS Linux 7 (Core)
 Operating System:           linux
 Architecture:               amd64
 Container Runtime Version:  docker://17.3.3
 Kubelet Version:            v1.11.3
 Kube-Proxy Version:         v1.11.3
PodCIDR:                     172.16.2.0/24
Non-terminated Pods:         (7 in total)
  Namespace                  Name                                         CPU Requests  CPU Limits  Memory Requests  Memory Limits
  ---------                  ----                                         ------------  ----------  ---------------  -------------
  default                    tomcat-test-dc745b54b-bkql9                  100m (5%)     200m (10%)  100Mi (3%)       100Mi (3%)
  default                    tomcat-test-dc745b54b-hkd25                  100m (5%)     200m (10%)  100Mi (3%)       100Mi (3%)
  default                    tomcat-test-hostnetwork-788b6b6988-sl75z     0 (0%)        0 (0%)      0 (0%)           0 (0%)
  ingress-nginx              nginx-ingress-controller-569c67fcdc-srxjw    0 (0%)        0 (0%)      0 (0%)           0 (0%)
  kube-system                coredns-78fcdf6894-wzr2z                     100m (5%)     0 (0%)      70Mi (2%)        170Mi (5%)
  kube-system                kube-flannel-ds-ftvsr                        100m (5%)     100m (5%)   50Mi (1%)        50Mi (1%)
  kube-system                kube-proxy-nbqvz                             0 (0%)        0 (0%)      0 (0%)           0 (0%)
Allocated resources:
  (Total limits may be over 100 percent, i.e., overcommitted.)
  Resource  Requests     Limits
  --------  --------     ------
  cpu       400m (20%)   500m (25%)
  memory    320Mi (11%)  420Mi (14%)
Events:
  Type    Reason    Age   From               Message
  ----    ------    ----  ----               -------
  Normal  Starting  36m   kube-proxy, node2  Starting kube-proxy.
  Normal  Starting  36m   kube-proxy, node2  Starting kube-proxy.

label

1
2
3
4
# 标签选择器
spec:
  matchExpressions:
    - {key: group, operator: In, values: [dev, test]}

健康检查

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 探活检查
livenessProbe:
  exec:
    command:
    - /bin/sh
    - -c
    - ps -ef|grep java |grep -v grep
  initialDelaySeconds: 10   # 执行检查的等待时间,等待容器启动
  periodSeconds: 10         # 检查的间隔时间
  failureThreshold: 2       # 失败的次数,失败2次重启
  successThreshold: 1       # 从错误到正确,几次成功认为恢复
  timeoutSeconds: 5         # 超时时间 
  
  httpGet:  # 返回200正常
    path: /examplts/index.html
    port: 8080
    scheme: HTTP
  initialDelaySeconds: 10
  periodSeconds: 5
  failureThreshold: 1 
  successThreshold: 1
  timeoutSeconds: 5  
  
  tcpSocket:
    port: 8080
  initialDelaySeconds: 10
  periodSeconds: 5
  failureThreshold: 1 
  successThreshold: 1
  timeoutSeconds: 5  

# 就绪检查
readinessProbe:
  

affinity 节点亲和性

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# 节点亲和性
apiVersion: apps/v1
kind: Deployment
metadata:
  name: tomcat-test-affinity
spec:
  replicas: 3
  selector:
    matchLabels:
      app: tomcat-t-a
  template:
    metadata:
      labels:
        app: tomcat-t-a
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: beta.kubernetes.io/arch
                operator: In
                values:
                - amd64
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 1
            preference:
              matchExpressions:
              - key: disktype
                operator: NotIn
                values:
                - ssd

# pod 亲和性
affinity:
  podAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
    - labelSelector:
        matchExpressions:
        - key: app
          operator: In
          values:
          - web-demo
      topologyKey: kubernetes.io/hostname
    preferredDuringSchedulingIgnoredDuringExecution:
    - weight: 100
      podAffinityTerm:
        labelSelector:
          matchExpressions:
          - key: app
            operator: In
            values:
            - web-demo-node
        topologyKey: kubernetes.io/hostname

# 可以通过podAntiAffinity 设置所有的pod不运行在同一节点上。 

# 污点节点
[root@master yamls]# kubectl taint nodes node2 gpu=true:NoSchedule
# 此时可以容忍调度到污点节点上。
tolerations:
  - key: "gpu"
    operator: "Equal"
    value: "true"
    effect: "NoSchedule"

# 取消污点
[root@master yamls]# kubectl taint nodes node2 gpu:NoSchedule-

部署策略

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# 重建  停止所有pod,删除所有pod。重建所有pod,服务中断。
spec:
  strategy:
    type: Recreate

# 滚动更新
spec:
  strategy:
    type: Rollingupdate
    rollingUpdate:
      maxSurge: 25% | 1
      maxUnavailable: 25%

[root@master yamls]# kubectl explain deploy.spec.strategy
KIND:     Deployment
VERSION:  extensions/v1beta1

RESOURCE: strategy <Object>

DESCRIPTION:
     The deployment strategy to use to replace existing pods with new ones.

     DeploymentStrategy describes how to replace existing pods with new ones.

FIELDS:
   rollingUpdate	<Object>
     Rolling update config params. Present only if DeploymentStrategyType =
     RollingUpdate.

   type	<string>
     Type of deployment. Can be "Recreate" or "RollingUpdate". Default is
     RollingUpdate.

[root@master yamls]# kubectl explain deploy.spec.strategy.rollingUpdate
KIND:     Deployment
VERSION:  extensions/v1beta1

RESOURCE: rollingUpdate <Object>

DESCRIPTION:
     Rolling update config params. Present only if DeploymentStrategyType =
     RollingUpdate.

     Spec to control the desired behavior of rolling update.

FIELDS:
   maxSurge	<string>
     The maximum number of pods that can be scheduled above the desired number
     of pods. Value can be an absolute number (ex: 5) or a percentage of desired
     pods (ex: 10%). This can not be 0 if MaxUnavailable is 0. Absolute number
     is calculated from percentage by rounding up. By default, a value of 1 is
     used. Example: when this is set to 30%, the new RC can be scaled up
     immediately when the rolling update starts, such that the total number of
     old and new pods do not exceed 130% of desired pods. Once old pods have
     been killed, new RC can be scaled up further, ensuring that total number of
     pods running at any time during the update is atmost 130% of desired pods.

   maxUnavailable	<string>
     The maximum number of pods that can be unavailable during the update. Value
     can be an absolute number (ex: 5) or a percentage of desired pods (ex:
     10%). Absolute number is calculated from percentage by rounding down. This
     can not be 0 if MaxSurge is 0. By default, a fixed value of 1 is used.
     Example: when this is set to 30%, the old RC can be scaled down to 70% of
     desired pods immediately when the rolling update starts. Once new pods are
     ready, old RC can be scaled down further, followed by scaling up the new
     RC, ensuring that the total number of pods available at all times during
     the update is at least 70% of desired pods.
     

# 蓝绿部署
两套deployment,通过切换service的标签选择完成更新。没有新旧交替的部署的过程。

# 金丝雀部署
两套deployment,service同时选择两个版本的。

pod相关

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
[root@master yamls]# kubectl explain pod.spec.containers.lifecycle.preStop # 
KIND:     Pod
VERSION:  v1

RESOURCE: preStop <Object>

DESCRIPTION:
     PreStop is called immediately before a container is terminated. The
     container is terminated after the handler completes. The reason for
     termination is passed to the handler. Regardless of the outcome of the
     handler, the container is eventually terminated. Other management of the
     container blocks until the hook completes. More info:
     https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks

     Handler defines a specific action that should be taken

FIELDS:
   exec	<Object>
     One and only one of the following should be specified. Exec specifies the
     action to take.

   httpGet	<Object>
     HTTPGet specifies the http request to perform.

   tcpSocket	<Object>
     TCPSocket specifies an action involving a TCP port. TCP hooks not yet
     supported

存储

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# pv 
# 新增磁盘 pvcreate /dev/sdb  需提前安装yum install lvm2 -y

# gluster
https://github.com/gluster/gluster-kubernetes

# 安装步骤
yum install glusterfs glusterfs-fuse -y

# 检查 
master:  ps -ef|grep apiserver |grep allow-pri

# 标签
[root@master gluster-fs]# kubectl label node/master storagenode=glusterfs
node/master labeled
[root@master gluster-fs]# kubectl label node/node1 storagenode=glusterfs
node/node1 labeled
[root@master gluster-fs]# kubectl label node/node2 storagenode=glusterfs
node/node2 labeled

# 安装heketi
进入容器 export HEKETI_CLI_SERVER=http://localhost:8080
准备json 文件topology.json

heketi-cli topology load --json topology.json

# 支持动态扩容的storageclass
[root@master glusterfs]# cat glusterfs-stroage-class-exp.yaml 
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: glusterfs-storage-class-exp
provisioner: kubernetes.io/glusterfs
parameters:
  resturl: "http://10.103.240.183"
  restauthenabled: "false"
allowVolumeExpansion: true

# 创建pvc
[root@master glusterfs]# cat g-pvc.yaml 
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: g-pvc
spec:
  storageClassName: glusterfs-storage-class
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 2Gi
# 使用pvc
[root@master glusterfs]# cat use-pod.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: web-deploy
spec:
  selector:
    matchLabels:
      app: web-deploy
  replicas: 2
  template:
    metadata:
      labels:
        app: web-deploy
    spec:
      containers:
      - name: web-deploy
        image: nginx
        ports:
        - containerPort: 80
        volumeMounts:
          - name: gluster-volume
            mountPath: "/usr/share/nginx/html"
            readOnly: false
      volumes:
      - name: gluster-volume
        persistentVolumeClaim:
          claimName: dabing-pvc-exp1

# 使用 pvc 模板 statefulset
  volumeClaimTemplates:
  - metadata:
      name: datadir
    spec:
      accessModes:
      - ReadWriteOnce
      storageClassName: glusterfs-storage-class
      resources:
        requests:
          storage: 3Gi