mirror of
https://codeberg.org/guix/guix.git
synced 2025-10-02 02:15:12 +00:00
linux-container: Lock mounts by default.
This makes it impossible to unmount or remount things from within ‘call-with-container’. * gnu/build/linux-container.scm (initialize-user-namespace): Add #:host-uid and #:host-gid. and honor them. (run-container): Add #:lock-mounts?. Honor it by calling ‘unshare’ followed by ‘initialize-user-namespace’. (call-with-container): Add #:lock-mounts? and pass it down. (container-excursion): Get the user namespace owning the PID namespace and join it, then join the remaining namespaces. * tests/containers.scm ("call-with-container, mnt namespace, locked mounts"): New test. ("container-excursion"): Pass #:lock-mounts? #f. Change-Id: I13be982aef99e68a653d472f0e595c81cfcfa392
This commit is contained in:
parent
e1a0171a56
commit
a57ed987ff
2 changed files with 103 additions and 41 deletions
|
@ -189,7 +189,10 @@ for the process."
|
||||||
(remount-read-only "/"))))
|
(remount-read-only "/"))))
|
||||||
|
|
||||||
(define* (initialize-user-namespace pid host-uids
|
(define* (initialize-user-namespace pid host-uids
|
||||||
#:key (guest-uid 0) (guest-gid 0))
|
#:key
|
||||||
|
(host-uid (getuid))
|
||||||
|
(host-gid (getgid))
|
||||||
|
(guest-uid 0) (guest-gid 0))
|
||||||
"Configure the user namespace for PID. HOST-UIDS specifies the number of
|
"Configure the user namespace for PID. HOST-UIDS specifies the number of
|
||||||
host user identifiers to map into the user namespace. GUEST-UID and GUEST-GID
|
host user identifiers to map into the user namespace. GUEST-UID and GUEST-GID
|
||||||
specify the first UID (respectively GID) that host UIDs (respectively GIDs)
|
specify the first UID (respectively GID) that host UIDs (respectively GIDs)
|
||||||
|
@ -200,24 +203,21 @@ map to in the namespace."
|
||||||
(define (scope file)
|
(define (scope file)
|
||||||
(string-append proc-dir file))
|
(string-append proc-dir file))
|
||||||
|
|
||||||
(let ((uid (getuid))
|
;; Only root can write to the gid map without first disabling the
|
||||||
(gid (getgid)))
|
;; setgroups syscall.
|
||||||
|
(unless (and (zero? host-uid) (zero? host-gid))
|
||||||
;; Only root can write to the gid map without first disabling the
|
(call-with-output-file (scope "/setgroups")
|
||||||
;; setgroups syscall.
|
|
||||||
(unless (and (zero? uid) (zero? gid))
|
|
||||||
(call-with-output-file (scope "/setgroups")
|
|
||||||
(lambda (port)
|
|
||||||
(display "deny" port))))
|
|
||||||
|
|
||||||
;; Map the user/group that created the container to the root user
|
|
||||||
;; within the container.
|
|
||||||
(call-with-output-file (scope "/uid_map")
|
|
||||||
(lambda (port)
|
(lambda (port)
|
||||||
(format port "~d ~d ~d" guest-uid uid host-uids)))
|
(display "deny" port))))
|
||||||
(call-with-output-file (scope "/gid_map")
|
|
||||||
(lambda (port)
|
;; Map the user/group that created the container to the root user
|
||||||
(format port "~d ~d ~d" guest-gid gid host-uids)))))
|
;; within the container.
|
||||||
|
(call-with-output-file (scope "/uid_map")
|
||||||
|
(lambda (port)
|
||||||
|
(format port "~d ~d ~d" guest-uid host-uid host-uids)))
|
||||||
|
(call-with-output-file (scope "/gid_map")
|
||||||
|
(lambda (port)
|
||||||
|
(format port "~d ~d ~d" guest-gid host-gid host-uids))))
|
||||||
|
|
||||||
(define (namespaces->bit-mask namespaces)
|
(define (namespaces->bit-mask namespaces)
|
||||||
"Return the number suitable for the 'flags' argument of 'clone' that
|
"Return the number suitable for the 'flags' argument of 'clone' that
|
||||||
|
@ -238,12 +238,14 @@ corresponds to the symbols in NAMESPACES."
|
||||||
#:key (guest-uid 0) (guest-gid 0)
|
#:key (guest-uid 0) (guest-gid 0)
|
||||||
(populate-file-system (const #t))
|
(populate-file-system (const #t))
|
||||||
(loopback-network? #t)
|
(loopback-network? #t)
|
||||||
|
(lock-mounts? #t)
|
||||||
writable-root?)
|
writable-root?)
|
||||||
"Run THUNK in a new container process and return its PID. ROOT specifies
|
"Run THUNK in a new container process and return its PID. ROOT specifies
|
||||||
the root directory for the container. MOUNTS is a list of <file-system>
|
the root directory for the container. MOUNTS is a list of <file-system>
|
||||||
objects that specify file systems to mount inside the container. NAMESPACES
|
objects that specify file systems to mount inside the container. NAMESPACES
|
||||||
is a list of symbols that correspond to the possible Linux namespaces: mnt,
|
is a list of symbols that correspond to the possible Linux namespaces: mnt,
|
||||||
ipc, uts, user, and net.
|
ipc, uts, user, and net. When LOCK-MOUNTS? is true, arrange so that none of
|
||||||
|
MOUNTS can be unmounted or remounted individually from within THUNK.
|
||||||
|
|
||||||
When LOOPBACK-NETWORK? is true and 'net is amount NAMESPACES, set up the
|
When LOOPBACK-NETWORK? is true and 'net is amount NAMESPACES, set up the
|
||||||
loopback device (\"lo\") and a minimal /etc/hosts.
|
loopback device (\"lo\") and a minimal /etc/hosts.
|
||||||
|
@ -303,6 +305,28 @@ that host UIDs (respectively GIDs) map to in the namespace."
|
||||||
;; cannot be 'read' so they shouldn't be written as is.
|
;; cannot be 'read' so they shouldn't be written as is.
|
||||||
(write args child)
|
(write args child)
|
||||||
(primitive-exit 3))))
|
(primitive-exit 3))))
|
||||||
|
|
||||||
|
(when (and lock-mounts?
|
||||||
|
(memq 'mnt namespaces)
|
||||||
|
(memq 'user namespaces))
|
||||||
|
;; Create a new mount namespace owned by a new user
|
||||||
|
;; namespace to "lock" together previous mounts, such that
|
||||||
|
;; they cannot be unmounted or remounted separately--see
|
||||||
|
;; mount_namespaces(7).
|
||||||
|
;;
|
||||||
|
;; Note: at this point, the process is single-threaded (no
|
||||||
|
;; GC mark threads, no finalization thread, etc.) which is
|
||||||
|
;; why unshare(CLONE_NEWUSER) can be used.
|
||||||
|
(let ((uid (getuid)) (gid (getgid)))
|
||||||
|
(unshare (logior CLONE_NEWUSER CLONE_NEWNS))
|
||||||
|
(when (file-exists? "/proc/self")
|
||||||
|
(initialize-user-namespace (getpid)
|
||||||
|
host-uids
|
||||||
|
#:host-uid uid
|
||||||
|
#:host-gid gid
|
||||||
|
#:guest-uid guest-uid
|
||||||
|
#:guest-gid guest-gid))))
|
||||||
|
|
||||||
;; TODO: Manage capabilities.
|
;; TODO: Manage capabilities.
|
||||||
(write 'ready child)
|
(write 'ready child)
|
||||||
(close-port child)
|
(close-port child)
|
||||||
|
@ -365,6 +389,7 @@ if there are no child processes left."
|
||||||
|
|
||||||
(define* (call-with-container mounts thunk #:key (namespaces %namespaces)
|
(define* (call-with-container mounts thunk #:key (namespaces %namespaces)
|
||||||
(host-uids 1) (guest-uid 0) (guest-gid 0)
|
(host-uids 1) (guest-uid 0) (guest-gid 0)
|
||||||
|
(lock-mounts? #t)
|
||||||
(relayed-signals (list SIGINT SIGTERM))
|
(relayed-signals (list SIGINT SIGTERM))
|
||||||
(child-is-pid1? #t)
|
(child-is-pid1? #t)
|
||||||
(populate-file-system (const #t))
|
(populate-file-system (const #t))
|
||||||
|
@ -449,6 +474,7 @@ load path must be adjusted as needed."
|
||||||
(call-with-temporary-directory
|
(call-with-temporary-directory
|
||||||
(lambda (root)
|
(lambda (root)
|
||||||
(let ((pid (run-container root mounts namespaces host-uids thunk*
|
(let ((pid (run-container root mounts namespaces host-uids thunk*
|
||||||
|
#:lock-mounts? lock-mounts?
|
||||||
#:guest-uid guest-uid
|
#:guest-uid guest-uid
|
||||||
#:guest-gid guest-gid
|
#:guest-gid guest-gid
|
||||||
#:populate-file-system populate-file-system
|
#:populate-file-system populate-file-system
|
||||||
|
@ -469,24 +495,35 @@ return the exit status, an integer as returned by 'waitpid'."
|
||||||
(0
|
(0
|
||||||
(call-with-clean-exit
|
(call-with-clean-exit
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(for-each (lambda (ns)
|
;; First, determine the user namespace that owns the pid namespace and
|
||||||
(let ((source (namespace-file (getpid) ns))
|
;; join that user namespace (the assumption is that it also owns all
|
||||||
(target (namespace-file pid ns)))
|
;; the other namespaces). It's important that the user namespace is
|
||||||
;; Joining the namespace that the process already
|
;; joined first, so that the user will have the privileges to join the
|
||||||
;; belongs to would throw an error so avoid that.
|
;; other namespaces.
|
||||||
;; XXX: This /proc interface leads to TOCTTOU.
|
(let* ((pid-ns (open-fdes (namespace-file pid "pid")
|
||||||
(unless (string=? (readlink source) (readlink target))
|
(logior O_CLOEXEC O_RDONLY)))
|
||||||
(call-with-input-file source
|
(user-ns (get-user-ns pid-ns)))
|
||||||
(lambda (current-ns-port)
|
(close-fdes pid-ns)
|
||||||
(call-with-input-file target
|
(unless (equal? (stat user-ns)
|
||||||
(lambda (new-ns-port)
|
(stat (namespace-file (getpid) "user")))
|
||||||
(setns (fileno new-ns-port) 0))))))))
|
(setns user-ns 0))
|
||||||
;; It's important that the user namespace is joined first,
|
(close-fdes user-ns)
|
||||||
;; so that the user will have the privileges to join the
|
|
||||||
;; other namespaces. Furthermore, it's important that the
|
;; Then join all the remaining namespaces.
|
||||||
;; mount namespace is joined last, otherwise the /proc mount
|
(for-each (lambda (ns)
|
||||||
;; point would no longer be accessible.
|
(let ((source (namespace-file (getpid) ns))
|
||||||
'("user" "ipc" "uts" "net" "pid" "mnt"))
|
(target (namespace-file pid ns)))
|
||||||
|
;; Joining the namespace that the process already
|
||||||
|
;; belongs to would throw an error so avoid that.
|
||||||
|
;; XXX: This /proc interface leads to TOCTTOU.
|
||||||
|
(unless (string=? (readlink source) (readlink target))
|
||||||
|
(call-with-input-file target
|
||||||
|
(lambda (new-ns-port)
|
||||||
|
(setns (fileno new-ns-port) 0))))))
|
||||||
|
;; It's important that the mount namespace is joined last,
|
||||||
|
;; otherwise the /proc mount point would no longer be
|
||||||
|
;; accessible.
|
||||||
|
'("ipc" "uts" "net" "pid" "mnt")))
|
||||||
(purify-environment)
|
(purify-environment)
|
||||||
(chdir "/")
|
(chdir "/")
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
;;; GNU Guix --- Functional package management for GNU
|
;;; GNU Guix --- Functional package management for GNU
|
||||||
;;; Copyright © 2015 David Thompson <davet@gnu.org>
|
;;; Copyright © 2015 David Thompson <davet@gnu.org>
|
||||||
;;; Copyright © 2016, 2017, 2019, 2023 Ludovic Courtès <ludo@gnu.org>
|
;;; Copyright © 2016-2017, 2019, 2023, 2025 Ludovic Courtès <ludo@gnu.org>
|
||||||
;;;
|
;;;
|
||||||
;;; This file is part of GNU Guix.
|
;;; This file is part of GNU Guix.
|
||||||
;;;
|
;;;
|
||||||
|
@ -110,6 +110,26 @@
|
||||||
(assert-exit (file-exists? "/testing")))
|
(assert-exit (file-exists? "/testing")))
|
||||||
#:namespaces '(user mnt))))
|
#:namespaces '(user mnt))))
|
||||||
|
|
||||||
|
(skip-if-unsupported)
|
||||||
|
(test-equal "call-with-container, mnt namespace, locked mounts"
|
||||||
|
EINVAL
|
||||||
|
;; umount(2) fails with EINVAL when targeting a mount point that is
|
||||||
|
;; "locked".
|
||||||
|
(status:exit-val
|
||||||
|
(call-with-container (list (file-system
|
||||||
|
(device "none")
|
||||||
|
(mount-point "/testing")
|
||||||
|
(type "tmpfs")
|
||||||
|
(check? #f)))
|
||||||
|
(lambda ()
|
||||||
|
(primitive-exit (catch 'system-error
|
||||||
|
(lambda ()
|
||||||
|
(umount "/testing")
|
||||||
|
0)
|
||||||
|
(lambda args
|
||||||
|
(system-error-errno args)))))
|
||||||
|
#:namespaces '(user mnt))))
|
||||||
|
|
||||||
(skip-if-unsupported)
|
(skip-if-unsupported)
|
||||||
(test-equal "call-with-container, mnt namespace, wrong bind mount"
|
(test-equal "call-with-container, mnt namespace, wrong bind mount"
|
||||||
`(system-error ,ENOENT)
|
`(system-error ,ENOENT)
|
||||||
|
@ -169,7 +189,8 @@
|
||||||
#:namespaces '(user mnt))))
|
#:namespaces '(user mnt))))
|
||||||
|
|
||||||
(skip-if-unsupported)
|
(skip-if-unsupported)
|
||||||
(test-assert "container-excursion"
|
(test-equal "container-excursion"
|
||||||
|
0
|
||||||
(call-with-temporary-directory
|
(call-with-temporary-directory
|
||||||
(lambda (root)
|
(lambda (root)
|
||||||
;; Two pipes: One for the container to signal that the test can begin,
|
;; Two pipes: One for the container to signal that the test can begin,
|
||||||
|
@ -193,7 +214,11 @@
|
||||||
(readlink (string-append "/proc/" pid "/ns/" ns)))
|
(readlink (string-append "/proc/" pid "/ns/" ns)))
|
||||||
'("user" "ipc" "uts" "net" "pid" "mnt"))))
|
'("user" "ipc" "uts" "net" "pid" "mnt"))))
|
||||||
|
|
||||||
(let* ((pid (run-container root '() %namespaces 1 container))
|
(let* ((pid (run-container root '() %namespaces 1 container
|
||||||
|
;; Do not lock mounts so the user namespace
|
||||||
|
;; appears to be the same seen from inside
|
||||||
|
;; and from outside.
|
||||||
|
#:lock-mounts? #f))
|
||||||
(container-namespaces (namespaces pid))
|
(container-namespaces (namespaces pid))
|
||||||
(result
|
(result
|
||||||
(begin
|
(begin
|
||||||
|
@ -213,7 +238,7 @@
|
||||||
(write 'done end-out)
|
(write 'done end-out)
|
||||||
(close end-out)
|
(close end-out)
|
||||||
(waitpid pid)
|
(waitpid pid)
|
||||||
(zero? result)))))))
|
result))))))
|
||||||
|
|
||||||
(skip-if-unsupported)
|
(skip-if-unsupported)
|
||||||
(test-equal "container-excursion, same namespaces"
|
(test-equal "container-excursion, same namespaces"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue