Common Misconfiguration
Running pods withprivileged: true or elevated capabilities breaks container isolation, allowing pods to access host resources, mount host filesystems, and potentially compromise the entire node.
Vulnerable Example
Copy
# Vulnerable Pod specification
apiVersion: v1
kind: Pod
metadata:
name: privileged-pod
namespace: production
spec:
containers:
- name: app
image: myapp:latest
securityContext:
privileged: true # DANGEROUS - full host access
runAsUser: 0 # Running as root
capabilities:
add:
- ALL # All Linux capabilities
volumeMounts:
- name: host-root
mountPath: /host
- name: docker-socket
mountPath: /var/run/docker.sock
volumes:
- name: host-root
hostPath:
path: / # Mounting entire host filesystem
- name: docker-socket
hostPath:
path: /var/run/docker.sock # Docker socket access
hostNetwork: true # Using host network namespace
hostPID: true # Using host PID namespace
hostIPC: true # Using host IPC namespace
Secure Example
Copy
# Secure Pod with Pod Security Standards
apiVersion: v1
kind: Namespace
metadata:
name: secure-apps
labels:
pod-security.kubernetes.io/enforce: restricted
pod-security.kubernetes.io/audit: restricted
pod-security.kubernetes.io/warn: restricted
---
apiVersion: v1
kind: Pod
metadata:
name: secure-pod
namespace: secure-apps
spec:
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 3000
fsGroup: 2000
seccompProfile:
type: RuntimeDefault
containers:
- name: app
image: myapp:1.2.3 # Use specific tags
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
capabilities:
drop:
- ALL # Drop all capabilities
add:
- NET_BIND_SERVICE # Only add specific required capabilities
seccompProfile:
type: RuntimeDefault
resources:
limits:
memory: "256Mi"
cpu: "500m"
requests:
memory: "128Mi"
cpu: "250m"
volumeMounts:
- name: app-data
mountPath: /app/data
- name: tmp
mountPath: /tmp
- name: cache
mountPath: /app/cache
volumes:
- name: app-data
persistentVolumeClaim:
claimName: app-data-pvc
readOnly: false
- name: tmp
emptyDir:
sizeLimit: 100Mi
- name: cache
emptyDir:
sizeLimit: 500Mi
Pod Security Policy (Deprecated - use Pod Security Standards)
Copy
# Pod Security Standards implementation
apiVersion: v1
kind: Namespace
metadata:
name: production
labels:
# Enforce restricted security standard
pod-security.kubernetes.io/enforce: restricted
pod-security.kubernetes.io/enforce-version: v1.28
pod-security.kubernetes.io/audit: restricted
pod-security.kubernetes.io/audit-version: v1.28
pod-security.kubernetes.io/warn: restricted
pod-security.kubernetes.io/warn-version: v1.28
---
# SecurityContext with OPA Gatekeeper
apiVersion: templates.gatekeeper.sh/v1beta1
kind: ConstraintTemplate
metadata:
name: k8srequiredsecuritycontext
spec:
crd:
spec:
names:
kind: K8sRequiredSecurityContext
validation:
openAPIV3Schema:
type: object
targets:
- target: admission.k8s.gatekeeper.sh
rego: |
package k8srequiredsecuritycontext
violation[{"msg": msg}] {
container := input.review.object.spec.containers[_]
not container.securityContext.runAsNonRoot
msg := sprintf("Container %v is not running as non-root", [container.name])
}
violation[{"msg": msg}] {
container := input.review.object.spec.containers[_]
container.securityContext.privileged
msg := sprintf("Container %v is running in privileged mode", [container.name])
}
violation[{"msg": msg}] {
container := input.review.object.spec.containers[_]
container.securityContext.allowPrivilegeEscalation
msg := sprintf("Container %v allows privilege escalation", [container.name])
}
violation[{"msg": msg}] {
container := input.review.object.spec.containers[_]
not container.securityContext.readOnlyRootFilesystem
msg := sprintf("Container %v does not have a read-only root filesystem", [container.name])
}
---
apiVersion: k8srequiredsecuritycontext.constraints.gatekeeper.sh/v1beta1
kind: K8sRequiredSecurityContext
metadata:
name: must-have-security-context
spec:
match:
kinds:
- apiGroups: ["apps", ""]
kinds: ["Deployment", "Pod", "StatefulSet", "DaemonSet"]
namespaces: ["production", "staging"]
Deployment with Security Best Practices
Copy
apiVersion: apps/v1
kind: Deployment
metadata:
name: secure-app
namespace: production
labels:
app: secure-app
version: v1.2.3
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: secure-app
template:
metadata:
labels:
app: secure-app
version: v1.2.3
annotations:
container.apparmor.security.beta.kubernetes.io/app: runtime/default
seccomp.security.alpha.kubernetes.io/pod: runtime/default
spec:
serviceAccountName: secure-app-sa
automountServiceAccountToken: false
securityContext:
runAsNonRoot: true
runAsUser: 65534 # nobody user
runAsGroup: 65534
fsGroup: 65534
seccompProfile:
type: RuntimeDefault
supplementalGroups: [65534]
containers:
- name: app
image: myregistry/secure-app:1.2.3
imagePullPolicy: Always
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 65534
capabilities:
drop:
- ALL
ports:
- containerPort: 8080
protocol: TCP
env:
- name: PORT
value: "8080"
volumeMounts:
- name: tmp-volume
mountPath: /tmp
- name: cache-volume
mountPath: /cache
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
resources:
limits:
memory: "512Mi"
cpu: "1000m"
ephemeral-storage: "1Gi"
requests:
memory: "256Mi"
cpu: "500m"
ephemeral-storage: "500Mi"
volumes:
- name: tmp-volume
emptyDir:
sizeLimit: 100Mi
- name: cache-volume
emptyDir:
sizeLimit: 500Mi
nodeSelector:
kubernetes.io/os: linux
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- secure-app
topologyKey: kubernetes.io/hostname

