[sandbox] Seccomp filter tweaks

* Restrict fcntl() to only the flags we need
* Non-fatally deny tgkill() and rt_sigaction() so that segfaults don't
  result in a SIGSYS. They're debuggable either way, but this way it's
  clearer what the issue is right away.
* Allow truncate() and ftruncate() for sqlite's alternate journal modes
* Slight macro cleanup
* Add missing colon in a DB log message

We don't need to worry about compilation problems arising if glibc or
musl-libc add their own wrapper for the seccomp() syscall in the future.
Ours will/would just silently take precedence over the external one
without interfering with compilation. This should work regardless of
whether libc uses weak symbols and regardless of whether libc is
dynamically or statically linked into the executable. The wrapper's
signature has been stripped of its static and inline qualifiers, as it
must match the exact declaration the libc headers will/would use.

Further, if a pre-compiled binary is run on a system which genuinely
doesn't support seccomp(), it'll just return ENOSYS and the server will
terminate with an error. The user can then just disable the sandbox in
the config file. We don't need any special logic for that scenario.
This commit is contained in:
dongresource 2021-12-24 03:21:43 +01:00
parent f4a7ab7373
commit 384a2ece78
2 changed files with 37 additions and 17 deletions

View File

@ -99,7 +99,7 @@ static void checkMetaTable() {
sqlite3_stmt* stmt; sqlite3_stmt* stmt;
sqlite3_prepare_v2(db, sql, -1, &stmt, NULL); sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
if (sqlite3_step(stmt) != SQLITE_ROW) { if (sqlite3_step(stmt) != SQLITE_ROW) {
std::cout << "[FATAL] Failed to check meta table" << sqlite3_errmsg(db) << std::endl; std::cout << "[FATAL] Failed to check meta table: " << sqlite3_errmsg(db) << std::endl;
sqlite3_finalize(stmt); sqlite3_finalize(stmt);
exit(1); exit(1);
} }

View File

@ -17,14 +17,8 @@
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/net.h> // for socketcall() args #include <linux/net.h> // for socketcall() args
// our own wrapper for the seccomp() syscall
// TODO: should this be conditional on a feature check or something?
static inline int seccomp(unsigned int operation, unsigned int flags, void *args) {
return syscall(__NR_seccomp, operation, flags, args);
}
/* /*
* Macros borrowed from from https://outflux.net/teach-seccomp/ * Macros adapted from https://outflux.net/teach-seccomp/
* Relevant license: * Relevant license:
* https://source.chromium.org/chromium/chromium/src/+/master:LICENSE * https://source.chromium.org/chromium/chromium/src/+/master:LICENSE
*/ */
@ -44,7 +38,7 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
#define VALIDATE_ARCHITECTURE \ #define VALIDATE_ARCHITECTURE \
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr), \ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr), \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), \ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS)
#define EXAMINE_SYSCALL \ #define EXAMINE_SYSCALL \
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr) BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr)
@ -53,8 +47,12 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
#define DENY_SYSCALL_ERRNO(name, _errno) \
BPF_JUMP(BPF_JMP+BPF_K+BPF_JEQ, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
#define KILL_PROCESS \ #define KILL_PROCESS \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS)
/* /*
* Macros adapted from openssh's sandbox-seccomp-filter.c * Macros adapted from openssh's sandbox-seccomp-filter.c
@ -114,10 +112,10 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
* Syscalls marked with "maybe" don't seem to be used in the default * Syscalls marked with "maybe" don't seem to be used in the default
* configuration, but should probably be whitelisted anyway. * configuration, but should probably be whitelisted anyway.
* *
* Syscalls marked with "musl-libc", "raspi" or "alt DB" were observed to be * Syscalls marked with comments like "musl-libc", "raspi" or "alt DB" were
* necessary on that configuration, but are probably neccessary in other * observed to be necessary on that particular configuration, but there are
* configurations as well. ("alt DB" represents libsqlite compiled with * probably other configurations in which they are neccessary as well.
* different options.) * ("alt DB" represents libsqlite compiled with different options.)
* *
* Syscalls marked "vdso" aren't normally caught by seccomp because they are * Syscalls marked "vdso" aren't normally caught by seccomp because they are
* implemented in the vdso(7) in most configurations, but it's still prudent * implemented in the vdso(7) in most configurations, but it's still prudent
@ -148,6 +146,8 @@ static sock_filter filter[] = {
ALLOW_SYSCALL(creat), // maybe; for DB journal ALLOW_SYSCALL(creat), // maybe; for DB journal
ALLOW_SYSCALL(unlink), // for DB journal ALLOW_SYSCALL(unlink), // for DB journal
ALLOW_SYSCALL(lseek), // musl-libc; alt DB ALLOW_SYSCALL(lseek), // musl-libc; alt DB
ALLOW_SYSCALL(truncate), // for truncate-mode DB
ALLOW_SYSCALL(ftruncate), // for truncate-mode DB
ALLOW_SYSCALL(dup), // for perror(), apparently ALLOW_SYSCALL(dup), // for perror(), apparently
// more IO // more IO
@ -164,11 +164,20 @@ static sock_filter filter[] = {
ALLOW_SYSCALL(geteuid), ALLOW_SYSCALL(geteuid),
ALLOW_SYSCALL(gettid), // maybe ALLOW_SYSCALL(gettid), // maybe
ALLOW_SYSCALL_ARG(ioctl, 1, TIOCGWINSZ), // musl-libc ALLOW_SYSCALL_ARG(ioctl, 1, TIOCGWINSZ), // musl-libc
ALLOW_SYSCALL(fcntl), ALLOW_SYSCALL_ARG(fcntl, 1, F_GETFL),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETFL),
ALLOW_SYSCALL_ARG(fcntl, 1, F_GETLK),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETLK),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETLKW), // maybe
ALLOW_SYSCALL(exit), ALLOW_SYSCALL(exit),
ALLOW_SYSCALL(exit_group), ALLOW_SYSCALL(exit_group),
ALLOW_SYSCALL(rt_sigprocmask), // musl-libc ALLOW_SYSCALL(rt_sigprocmask), // musl-libc
// to crash properly on SIGSEGV
DENY_SYSCALL_ERRNO(tgkill, EPERM),
DENY_SYSCALL_ERRNO(tkill, EPERM), // musl-libc
DENY_SYSCALL_ERRNO(rt_sigaction, EPERM),
// threading // threading
ALLOW_SYSCALL(futex), ALLOW_SYSCALL(futex),
@ -229,17 +238,28 @@ static sock_filter filter[] = {
#ifdef __NR_geteuid32 #ifdef __NR_geteuid32
ALLOW_SYSCALL(geteuid32), ALLOW_SYSCALL(geteuid32),
#endif #endif
#ifdef __NR_truncate64
ALLOW_SYSCALL(truncate64),
#endif
#ifdef __NR_ftruncate64
ALLOW_SYSCALL(ftruncate64),
#endif
#ifdef __NR_sigreturn #ifdef __NR_sigreturn
ALLOW_SYSCALL(sigreturn), // vdso ALLOW_SYSCALL(sigreturn), // vdso
#endif #endif
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS) KILL_PROCESS
}; };
static sock_fprog prog = { static sock_fprog prog = {
ARRLEN(filter), filter ARRLEN(filter), filter
}; };
// our own wrapper for the seccomp() syscall
int seccomp(unsigned int operation, unsigned int flags, void *args) {
return syscall(__NR_seccomp, operation, flags, args);
}
void sandbox_start() { void sandbox_start() {
if (!settings::SANDBOX) { if (!settings::SANDBOX) {
std::cout << "[WARN] Running without a sandbox" << std::endl; std::cout << "[WARN] Running without a sandbox" << std::endl;
@ -259,4 +279,4 @@ void sandbox_start() {
} }
} }
#endif // SANDBOX_SECCOMP #endif