Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: do not enforce PrivateTmp with DynamicUser if /tmp and /var/tmp are already private tmpfs #32724

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 6 additions & 6 deletions man/systemd.exec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -671,12 +671,12 @@
part of a unit for which dynamic users/groups are enabled do not leave files or directories owned by
these users/groups around, as a different unit might get the same UID/GID assigned later on, and thus
gain access to these files or directories. If <varname>DynamicUser=</varname> is enabled,
<varname>RemoveIPC=</varname> and <varname>PrivateTmp=</varname> are implied (and cannot be turned
off). This ensures that the lifetime of IPC objects and temporary files created by the executed
processes is bound to the runtime of the service, and hence the lifetime of the dynamic
user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are usually the only
world-writable directories on a system this ensures that a unit making use of dynamic user/group
allocation cannot leave files around after unit termination. Furthermore
<varname>RemoveIPC=</varname> and <varname>TemporaryFileSystem=/tmp/ /var/tmp/</varname> are implied
(and cannot be turned off). This ensures that the lifetime of IPC objects and temporary files
created by the executed processes is bound to the runtime of the service, and hence the lifetime of
the dynamic user/group. Since <filename>/tmp/</filename> and <filename>/var/tmp/</filename> are
usually the only world-writable directories on a system this ensures that a unit making use of
dynamic user/group allocation cannot leave files around after unit termination. Furthermore
<varname>NoNewPrivileges=</varname> and <varname>RestrictSUIDSGID=</varname> are implicitly enabled
(and cannot be disabled), to ensure that processes invoked cannot take benefit or create SUID/SGID
files or directories. Moreover <varname>ProtectSystem=strict</varname> and
Expand Down
4 changes: 4 additions & 0 deletions src/core/exec-invoke.c
Original file line number Diff line number Diff line change
Expand Up @@ -3205,6 +3205,10 @@ static int apply_mount_namespace(
.temporary_filesystems = context->temporary_filesystems,
.n_temporary_filesystems = context->n_temporary_filesystems,

/* When DynamicUser=yes enforce that /tmp/ and /var/tmp/ are disconnected from the host's
* directories, as they are world writable and ephemeral uid/gid will be used. */
.private_tmp_as_tmpfs = context->dynamic_user,

.mount_images = context->mount_images,
.n_mount_images = context->n_mount_images,
.mount_image_policy = context->mount_image_policy ?: &image_policy_service,
Expand Down
4 changes: 4 additions & 0 deletions src/core/execute.c
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ bool exec_needs_mount_namespace(
if (!IN_SET(context->mount_propagation_flag, 0, MS_SHARED))
return true;

if (context->dynamic_user)
return true;

if (context->private_tmp && runtime && runtime->shared && (runtime->shared->tmp_dir || runtime->shared->var_tmp_dir))
return true;

Expand Down Expand Up @@ -2161,6 +2164,7 @@ static int exec_shared_runtime_make(
}

if (c->private_tmp &&
!c->dynamic_user &&
!(prefixed_path_strv_contains(c->inaccessible_paths, "/tmp") &&
(prefixed_path_strv_contains(c->inaccessible_paths, "/var/tmp") ||
prefixed_path_strv_contains(c->inaccessible_paths, "/var")))) {
Expand Down
56 changes: 33 additions & 23 deletions src/core/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -2245,32 +2245,42 @@ int setup_namespace(const NamespaceParameters *p, char **error_path) {
if (r < 0)
return r;

if (p->tmp_dir) {
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);

MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();

*me = (MountEntry) {
.path_const = "/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->tmp_dir,
};
}
if (p->private_tmp_as_tmpfs) {
r = append_tmpfs_mounts(
&ml,
(TemporaryFileSystem []){{(char *) "/tmp", (char *) "mode=01777" NESTED_TMPFS_LIMITS},
{(char *) "/var/tmp", (char *) "mode=01777" NESTED_TMPFS_LIMITS}},
/* n= */ 2);
if (r < 0)
return r;
} else {
if (p->tmp_dir) {
bool ro = streq(p->tmp_dir, RUN_SYSTEMD_EMPTY);

MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();

*me = (MountEntry) {
.path_const = "/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->tmp_dir,
};
}

if (p->var_tmp_dir) {
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);
if (p->var_tmp_dir) {
bool ro = streq(p->var_tmp_dir, RUN_SYSTEMD_EMPTY);

MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();
MountEntry *me = mount_list_extend(&ml);
if (!me)
return log_oom_debug();

*me = (MountEntry) {
.path_const = "/var/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->var_tmp_dir,
};
*me = (MountEntry) {
.path_const = "/var/tmp",
.mode = ro ? MOUNT_PRIVATE_TMP_READ_ONLY : MOUNT_PRIVATE_TMP,
.source_const = p->var_tmp_dir,
};
}
}

r = append_mount_images(&ml, p->mount_images, p->n_mount_images);
Expand Down
2 changes: 2 additions & 0 deletions src/core/namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ struct NamespaceParameters {
const TemporaryFileSystem *temporary_filesystems;
size_t n_temporary_filesystems;

bool private_tmp_as_tmpfs;

const MountImage *mount_images;
size_t n_mount_images;
const ImagePolicy *mount_image_policy;
Expand Down
3 changes: 1 addition & 2 deletions src/core/unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ int unit_add_exec_dependencies(Unit *u, ExecContext *c) {
return r;
}

if (c->private_tmp) {
if (c->private_tmp && !c->dynamic_user) {
r = unit_add_mounts_for(u, "/tmp", UNIT_DEPENDENCY_FILE, UNIT_MOUNT_WANTS);
if (r < 0)
return r;
Expand Down Expand Up @@ -4279,7 +4279,6 @@ int unit_patch_contexts(Unit *u) {
* UID/GID around in the file system or on IPC objects. Hence enforce a strict
* sandbox. */

ec->private_tmp = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'd still imply this, it's prettier that way, because it tells external tools that we do not share the regular /tmp/ on these services

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed it to avoid triggering all the machinery that sets up the directories in the host filesystem, passes the pipes, adds the tracking, etc since it's not needed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And actually given the exact semantics on PrivateTmp, where the storage is actually on the host and it is visible to root there, and it can be shared, I think we should not toggle this, because it would not correspond to what actually happens, and suddenly it could mean either the new thing or the old thing.

ec->remove_ipc = true;
ec->protect_system = PROTECT_SYSTEM_STRICT;
if (ec->protect_home == PROTECT_HOME_NO)
Expand Down
1 change: 0 additions & 1 deletion src/portable/profile/default/service.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER
CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN \
CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_SETGID CAP_SETPCAP \
CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
PrivateTmp=yes
PrivateDevices=yes
PrivateUsers=yes
ProtectSystem=strict
Expand Down
1 change: 0 additions & 1 deletion src/portable/profile/nonetwork/service.conf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ RemoveIPC=yes
CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_DAC_READ_SEARCH CAP_FOWNER \
CAP_FSETID CAP_IPC_LOCK CAP_IPC_OWNER CAP_KILL CAP_MKNOD CAP_SETGID CAP_SETPCAP \
CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT CAP_SYS_NICE CAP_SYS_RESOURCE
PrivateTmp=yes
PrivateDevices=yes
PrivateUsers=yes
ProtectSystem=strict
Expand Down
1 change: 0 additions & 1 deletion src/portable/profile/strict/service.conf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ BindReadOnlyPaths=/etc/machine-id
DynamicUser=yes
RemoveIPC=yes
CapabilityBoundingSet=
PrivateTmp=yes
PrivateDevices=yes
PrivateUsers=yes
ProtectSystem=strict
Expand Down
10 changes: 10 additions & 0 deletions test/units/TEST-07-PID1.exec-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,16 @@ if [[ ! -v ASAN_OPTIONS ]] && systemctl --version | grep "+BPF_FRAMEWORK" && ker
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /proc)
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /dev)
(! systemd-run --wait --pipe -p RestrictFileSystems="~proc devtmpfs sysfs" ls /sys)

# Ensure DynamicUser=yes does not imply PrivateTmp=yes if TemporaryFileSystem=/tmp /var/tmp is set
systemd-run --unit test-07-dynamic-user-tmp.service \
--service-type=notify \
-p DynamicUser=yes \
-p NotifyAccess=all \
sh -c 'systemd-notify --ready; sleep infinity'
(! ls /tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
(! ls /var/tmp/systemd-private-"$(tr -d '-' < /proc/sys/kernel/random/boot_id)"-test-07-dynamic-user-tmp.service-* &>/dev/null)
systemctl stop test-07-dynamic-user-tmp.service
fi

# Make sure we properly (de)serialize various string arrays, including whitespaces
Expand Down