[sandbox] Seccomp filter tweaks

* Restrict fcntl() to only the flags we need
* Non-fatally deny tgkill() and rt_sigaction() so that segfaults don't
  result in a SIGSYS. They're debuggable either way, but this way it's
  clearer what the issue is right away.
* Allow truncate() and ftruncate() for sqlite's alternate journal modes
* Slight macro cleanup
* Add missing colon in a DB log message

We don't need to worry about compilation problems arising if glibc or
musl-libc add their own wrapper for the seccomp() syscall in the future.
Ours will/would just silently take precedence over the external one
without interfering with compilation. This should work regardless of
whether libc uses weak symbols and regardless of whether libc is
dynamically or statically linked into the executable. The wrapper's
signature has been stripped of its static and inline qualifiers, as it
must match the exact declaration the libc headers will/would use.

Further, if a pre-compiled binary is run on a system which genuinely
doesn't support seccomp(), it'll just return ENOSYS and the server will
terminate with an error. The user can then just disable the sandbox in
the config file. We don't need any special logic for that scenario.
This commit is contained in:
dongresource 2021-12-24 03:21:43 +01:00
parent f4a7ab7373
commit 384a2ece78
2 changed files with 37 additions and 17 deletions

View File

@ -99,7 +99,7 @@ static void checkMetaTable() {
sqlite3_stmt* stmt;
sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
if (sqlite3_step(stmt) != SQLITE_ROW) {
std::cout << "[FATAL] Failed to check meta table" << sqlite3_errmsg(db) << std::endl;
std::cout << "[FATAL] Failed to check meta table: " << sqlite3_errmsg(db) << std::endl;
sqlite3_finalize(stmt);
exit(1);
}

View File

@ -17,14 +17,8 @@
#include <linux/audit.h>
#include <linux/net.h> // for socketcall() args
// our own wrapper for the seccomp() syscall
// TODO: should this be conditional on a feature check or something?
static inline int seccomp(unsigned int operation, unsigned int flags, void *args) {
return syscall(__NR_seccomp, operation, flags, args);
}
/*
* Macros borrowed from from https://outflux.net/teach-seccomp/
* Macros adapted from https://outflux.net/teach-seccomp/
* Relevant license:
* https://source.chromium.org/chromium/chromium/src/+/master:LICENSE
*/
@ -44,7 +38,7 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
#define VALIDATE_ARCHITECTURE \
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, arch_nr), \
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, ARCH_NR, 1, 0), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS)
#define EXAMINE_SYSCALL \
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_nr)
@ -53,8 +47,12 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
#define DENY_SYSCALL_ERRNO(name, _errno) \
BPF_JUMP(BPF_JMP+BPF_K+BPF_JEQ, __NR_##name, 0, 1), \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
#define KILL_PROCESS \
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS)
/*
* Macros adapted from openssh's sandbox-seccomp-filter.c
@ -114,10 +112,10 @@ static inline int seccomp(unsigned int operation, unsigned int flags, void *args
* Syscalls marked with "maybe" don't seem to be used in the default
* configuration, but should probably be whitelisted anyway.
*
* Syscalls marked with "musl-libc", "raspi" or "alt DB" were observed to be
* necessary on that configuration, but are probably neccessary in other
* configurations as well. ("alt DB" represents libsqlite compiled with
* different options.)
* Syscalls marked with comments like "musl-libc", "raspi" or "alt DB" were
* observed to be necessary on that particular configuration, but there are
* probably other configurations in which they are neccessary as well.
* ("alt DB" represents libsqlite compiled with different options.)
*
* Syscalls marked "vdso" aren't normally caught by seccomp because they are
* implemented in the vdso(7) in most configurations, but it's still prudent
@ -148,6 +146,8 @@ static sock_filter filter[] = {
ALLOW_SYSCALL(creat), // maybe; for DB journal
ALLOW_SYSCALL(unlink), // for DB journal
ALLOW_SYSCALL(lseek), // musl-libc; alt DB
ALLOW_SYSCALL(truncate), // for truncate-mode DB
ALLOW_SYSCALL(ftruncate), // for truncate-mode DB
ALLOW_SYSCALL(dup), // for perror(), apparently
// more IO
@ -164,11 +164,20 @@ static sock_filter filter[] = {
ALLOW_SYSCALL(geteuid),
ALLOW_SYSCALL(gettid), // maybe
ALLOW_SYSCALL_ARG(ioctl, 1, TIOCGWINSZ), // musl-libc
ALLOW_SYSCALL(fcntl),
ALLOW_SYSCALL_ARG(fcntl, 1, F_GETFL),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETFL),
ALLOW_SYSCALL_ARG(fcntl, 1, F_GETLK),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETLK),
ALLOW_SYSCALL_ARG(fcntl, 1, F_SETLKW), // maybe
ALLOW_SYSCALL(exit),
ALLOW_SYSCALL(exit_group),
ALLOW_SYSCALL(rt_sigprocmask), // musl-libc
// to crash properly on SIGSEGV
DENY_SYSCALL_ERRNO(tgkill, EPERM),
DENY_SYSCALL_ERRNO(tkill, EPERM), // musl-libc
DENY_SYSCALL_ERRNO(rt_sigaction, EPERM),
// threading
ALLOW_SYSCALL(futex),
@ -229,17 +238,28 @@ static sock_filter filter[] = {
#ifdef __NR_geteuid32
ALLOW_SYSCALL(geteuid32),
#endif
#ifdef __NR_truncate64
ALLOW_SYSCALL(truncate64),
#endif
#ifdef __NR_ftruncate64
ALLOW_SYSCALL(ftruncate64),
#endif
#ifdef __NR_sigreturn
ALLOW_SYSCALL(sigreturn), // vdso
#endif
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_PROCESS)
KILL_PROCESS
};
static sock_fprog prog = {
ARRLEN(filter), filter
};
// our own wrapper for the seccomp() syscall
int seccomp(unsigned int operation, unsigned int flags, void *args) {
return syscall(__NR_seccomp, operation, flags, args);
}
void sandbox_start() {
if (!settings::SANDBOX) {
std::cout << "[WARN] Running without a sandbox" << std::endl;
@ -259,4 +279,4 @@ void sandbox_start() {
}
}
#endif // SANDBOX_SECCOMP
#endif