Skip to main content

Common Misconfiguration

Running pods with privileged: true or elevated capabilities breaks container isolation, allowing pods to access host resources, mount host filesystems, and potentially compromise the entire node.

Vulnerable Example

# Vulnerable Pod specification
apiVersion: v1
kind: Pod
metadata:
  name: privileged-pod
  namespace: production
spec:
  containers:
  - name: app
    image: myapp:latest
    securityContext:
      privileged: true  # DANGEROUS - full host access
      runAsUser: 0  # Running as root
      capabilities:
        add:
          - ALL  # All Linux capabilities
    volumeMounts:
    - name: host-root
      mountPath: /host
    - name: docker-socket
      mountPath: /var/run/docker.sock
  volumes:
  - name: host-root
    hostPath:
      path: /  # Mounting entire host filesystem
  - name: docker-socket
    hostPath:
      path: /var/run/docker.sock  # Docker socket access
  hostNetwork: true  # Using host network namespace
  hostPID: true  # Using host PID namespace
  hostIPC: true  # Using host IPC namespace

Secure Example

# Secure Pod with Pod Security Standards
apiVersion: v1
kind: Namespace
metadata:
  name: secure-apps
  labels:
    pod-security.kubernetes.io/enforce: restricted
    pod-security.kubernetes.io/audit: restricted
    pod-security.kubernetes.io/warn: restricted
---
apiVersion: v1
kind: Pod
metadata:
  name: secure-pod
  namespace: secure-apps
spec:
  securityContext:
    runAsNonRoot: true
    runAsUser: 1000
    runAsGroup: 3000
    fsGroup: 2000
    seccompProfile:
      type: RuntimeDefault
  containers:
  - name: app
    image: myapp:1.2.3  # Use specific tags
    imagePullPolicy: IfNotPresent
    securityContext:
      allowPrivilegeEscalation: false
      readOnlyRootFilesystem: true
      runAsNonRoot: true
      runAsUser: 1000
      capabilities:
        drop:
          - ALL  # Drop all capabilities
        add:
          - NET_BIND_SERVICE  # Only add specific required capabilities
      seccompProfile:
        type: RuntimeDefault
    resources:
      limits:
        memory: "256Mi"
        cpu: "500m"
      requests:
        memory: "128Mi"
        cpu: "250m"
    volumeMounts:
    - name: app-data
      mountPath: /app/data
    - name: tmp
      mountPath: /tmp
    - name: cache
      mountPath: /app/cache
  volumes:
  - name: app-data
    persistentVolumeClaim:
      claimName: app-data-pvc
      readOnly: false
  - name: tmp
    emptyDir:
      sizeLimit: 100Mi
  - name: cache
    emptyDir:
      sizeLimit: 500Mi

Pod Security Policy (Deprecated - use Pod Security Standards)

# Pod Security Standards implementation
apiVersion: v1
kind: Namespace
metadata:
  name: production
  labels:
    # Enforce restricted security standard
    pod-security.kubernetes.io/enforce: restricted
    pod-security.kubernetes.io/enforce-version: v1.28
    pod-security.kubernetes.io/audit: restricted
    pod-security.kubernetes.io/audit-version: v1.28
    pod-security.kubernetes.io/warn: restricted
    pod-security.kubernetes.io/warn-version: v1.28
---
# SecurityContext with OPA Gatekeeper
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
  name: k8srequiredsecuritycontext
spec:
  crd:
    spec:
      names:
        kind: K8sRequiredSecurityContext
      validation:
        openAPIV3Schema:
          type: object
  targets:
    - target: admission.k8s.gatekeeper.sh
      rego: |
        package k8srequiredsecuritycontext
        
        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          not container.securityContext.runAsNonRoot
          msg := sprintf("Container %v is not running as non-root", [container.name])
        }
        
        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          container.securityContext.privileged
          msg := sprintf("Container %v is running in privileged mode", [container.name])
        }
        
        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          container.securityContext.allowPrivilegeEscalation
          msg := sprintf("Container %v allows privilege escalation", [container.name])
        }
        
        violation[{"msg": msg}] {
          container := input.review.object.spec.containers[_]
          not container.securityContext.readOnlyRootFilesystem
          msg := sprintf("Container %v does not have a read-only root filesystem", [container.name])
        }
---
apiVersion: k8srequiredsecuritycontext.constraints.gatekeeper.sh/v1beta1
kind: K8sRequiredSecurityContext
metadata:
  name: must-have-security-context
spec:
  match:
    kinds:
    - apiGroups: ["apps", ""]
      kinds: ["Deployment", "Pod", "StatefulSet", "DaemonSet"]
    namespaces: ["production", "staging"]

Deployment with Security Best Practices

apiVersion: apps/v1
kind: Deployment
metadata:
  name: secure-app
  namespace: production
  labels:
    app: secure-app
    version: v1.2.3
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  selector:
    matchLabels:
      app: secure-app
  template:
    metadata:
      labels:
        app: secure-app
        version: v1.2.3
      annotations:
        container.apparmor.security.beta.kubernetes.io/app: runtime/default
        seccomp.security.alpha.kubernetes.io/pod: runtime/default
    spec:
      serviceAccountName: secure-app-sa
      automountServiceAccountToken: false
      securityContext:
        runAsNonRoot: true
        runAsUser: 65534  # nobody user
        runAsGroup: 65534
        fsGroup: 65534
        seccompProfile:
          type: RuntimeDefault
        supplementalGroups: [65534]
      containers:
      - name: app
        image: myregistry/secure-app:1.2.3
        imagePullPolicy: Always
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 65534
          capabilities:
            drop:
              - ALL
        ports:
        - containerPort: 8080
          protocol: TCP
        env:
        - name: PORT
          value: "8080"
        volumeMounts:
        - name: tmp-volume
          mountPath: /tmp
        - name: cache-volume
          mountPath: /cache
        livenessProbe:
          httpGet:
            path: /healthz
            port: 8080
          initialDelaySeconds: 30
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /ready
            port: 8080
          initialDelaySeconds: 5
          periodSeconds: 5
        resources:
          limits:
            memory: "512Mi"
            cpu: "1000m"
            ephemeral-storage: "1Gi"
          requests:
            memory: "256Mi"
            cpu: "500m"
            ephemeral-storage: "500Mi"
      volumes:
      - name: tmp-volume
        emptyDir:
          sizeLimit: 100Mi
      - name: cache-volume
        emptyDir:
          sizeLimit: 500Mi
      nodeSelector:
        kubernetes.io/os: linux
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - secure-app
              topologyKey: kubernetes.io/hostname