Merge bitcoin/bitcoin#27896: Remove the syscall sandbox

32e2ffc393 Remove the syscall sandbox (fanquake)

Pull request description:

  After initially being merged in #20487, it's no-longer clear that an internal syscall sandboxing mechanism is something that Bitcoin Core should have/maintain, especially when compared to better maintained/supported alterantives, i.e [firejail](https://github.com/netblue30/firejail).

  There is more related discussion in #24771.

  Note that given where it's used, the sandbox also gets dragged into the kernel.

  If it's removed, this should not require any sort of deprecation, as this was only ever an opt-in, experimental feature.

  Closes #24771.

ACKs for top commit:
  davidgumberg:
     crACK 32e2ffc393
  achow101:
    ACK 32e2ffc393
  dergoegge:
    ACK 32e2ffc393

Tree-SHA512: 8cf71c5623bb642cb515531d4a2545d806e503b9d57bfc15a996597632b06103d60d985fd7f843a3c1da6528bc38d0298d6b8bcf0be6f851795a8040d71faf16
This commit is contained in:
Andrew Chow 2023-06-27 17:48:15 -04:00
commit caff95a023
No known key found for this signature in database
GPG Key ID: 17565732E08E5E41
28 changed files with 5 additions and 1175 deletions

View File

@ -15,4 +15,3 @@ export GOAL="install"
export BITCOIN_CONFIG="--enable-debug CC='clang -m32' CXX='clang++ -m32' \
LDFLAGS='--rtlib=compiler-rt -lgcc_s' CPPFLAGS='-DBOOST_MULTI_INDEX_ENABLE_SAFE_MODE'"
export TEST_RUNNER_ENV="BITCOIND=bitcoin-node"
export TEST_RUNNER_EXTRA="--nosandbox"

View File

@ -11,7 +11,7 @@ export CONTAINER_NAME=ci_native_valgrind
export PACKAGES="valgrind clang llvm libclang-rt-dev python3-zmq libevent-dev bsdmainutils libboost-dev libdb5.3++-dev libminiupnpc-dev libnatpmp-dev libzmq3-dev libsqlite3-dev"
export USE_VALGRIND=1
export NO_DEPENDS=1
export TEST_RUNNER_EXTRA="--nosandbox --exclude feature_init,rpc_bind,feature_bind_extra" # Excluded for now, see https://github.com/bitcoin/bitcoin/issues/17765#issuecomment-602068547
export TEST_RUNNER_EXTRA="--exclude feature_init,rpc_bind,feature_bind_extra" # Excluded for now, see https://github.com/bitcoin/bitcoin/issues/17765#issuecomment-602068547
export GOAL="install"
# Temporarily pin dwarf 4, until using Valgrind 3.20 or later
export BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --with-gui=no CC=clang CXX=clang++ CFLAGS='-gdwarf-4' CXXFLAGS='-gdwarf-4'" # TODO enable GUI

View File

@ -96,12 +96,6 @@ case $host in
;;
esac
AC_ARG_WITH([seccomp],
[AS_HELP_STRING([--with-seccomp],
[enable experimental syscall sandbox feature (-sandbox), default is yes if seccomp-bpf is detected under Linux x86_64])],
[seccomp_found=$withval],
[seccomp_found=auto])
AC_ARG_ENABLE([c++20],
[AS_HELP_STRING([--enable-c++20],
[enable compilation in c++20 mode (disabled by default)])],
@ -1540,36 +1534,6 @@ if test "$use_external_signer" != "no"; then
fi
AM_CONDITIONAL([ENABLE_EXTERNAL_SIGNER], [test "$use_external_signer" = "yes"])
dnl Do not compile with syscall sandbox support when compiling under the sanitizers.
dnl The sanitizers introduce use of syscalls that are not typically used in bitcoind
dnl (such as execve when the sanitizers execute llvm-symbolizer).
if test "$use_sanitizers" != ""; then
AC_MSG_WARN([Specifying --with-sanitizers forces --without-seccomp since the sanitizers introduce use of syscalls not allowed by the bitcoind syscall sandbox (-sandbox=<mode>).])
seccomp_found=no
fi
if test "$seccomp_found" != "no"; then
AC_MSG_CHECKING([for seccomp-bpf (Linux x86-64)])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([[
@%:@include <linux/seccomp.h>
]], [[
#if !defined(__x86_64__)
# error Syscall sandbox is an experimental feature currently available only under Linux x86-64.
#endif
]])],[
AC_MSG_RESULT([yes])
seccomp_found="yes"
AC_DEFINE([USE_SYSCALL_SANDBOX], [1], [Define this symbol to build with syscall sandbox support.])
],[
AC_MSG_RESULT([no])
seccomp_found="no"
])
fi
dnl Currently only enable -sandbox=<mode> feature if seccomp is found.
dnl In the future, sandboxing could be also be supported with other
dnl sandboxing mechanisms besides seccomp.
use_syscall_sandbox=$seccomp_found
AM_CONDITIONAL([ENABLE_SYSCALL_SANDBOX], [test "$use_syscall_sandbox" != "no"])
dnl Check for reduced exports
if test "$use_reduce_exports" = "yes"; then
AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [CORE_CXXFLAGS="$CORE_CXXFLAGS -fvisibility=hidden"],
@ -2009,7 +1973,6 @@ echo
echo "Options used to compile and link:"
echo " external signer = $use_external_signer"
echo " multiprocess = $build_multiprocess"
echo " with experimental syscall sandbox support = $use_syscall_sandbox"
echo " with libs = $build_bitcoin_libs"
echo " with wallet = $enable_wallet"
if test "$enable_wallet" != "no"; then

View File

@ -313,7 +313,6 @@ BITCOIN_CORE_H = \
util/sock.h \
util/spanparsing.h \
util/string.h \
util/syscall_sandbox.h \
util/syserror.h \
util/thread.h \
util/threadinterrupt.h \
@ -741,7 +740,6 @@ libbitcoin_util_a_SOURCES = \
util/spanparsing.cpp \
util/strencodings.cpp \
util/string.cpp \
util/syscall_sandbox.cpp \
util/time.cpp \
util/tokenpipe.cpp \
$(BITCOIN_CORE_H)
@ -976,7 +974,6 @@ libbitcoinkernel_la_SOURCES = \
util/serfloat.cpp \
util/strencodings.cpp \
util/string.cpp \
util/syscall_sandbox.cpp \
util/syserror.cpp \
util/thread.cpp \
util/threadnames.cpp \

View File

@ -24,7 +24,6 @@
#include <util/check.h>
#include <util/exception.h>
#include <util/strencodings.h>
#include <util/syscall_sandbox.h>
#include <util/syserror.h>
#include <util/threadnames.h>
#include <util/tokenpipe.h>
@ -242,7 +241,6 @@ static bool AppInit(NodeContext& node)
daemon_ep.Close();
}
#endif
SetSyscallSandboxPolicy(SyscallSandboxPolicy::SHUTOFF);
return fRet;
}

View File

@ -7,7 +7,6 @@
#include <sync.h>
#include <tinyformat.h>
#include <util/syscall_sandbox.h>
#include <util/threadnames.h>
#include <algorithm>
@ -149,7 +148,6 @@ public:
for (int n = 0; n < threads_num; ++n) {
m_worker_threads.emplace_back([this, n]() {
util::ThreadRename(strprintf("scriptch.%i", n));
SetSyscallSandboxPolicy(SyscallSandboxPolicy::VALIDATION_SCRIPT_CHECK);
Loop(false /* worker thread */);
});
}

View File

@ -18,7 +18,6 @@
#include <shutdown.h>
#include <sync.h>
#include <util/strencodings.h>
#include <util/syscall_sandbox.h>
#include <util/threadnames.h>
#include <util/translation.h>
@ -297,7 +296,6 @@ static void http_reject_request_cb(struct evhttp_request* req, void*)
static void ThreadHTTP(struct event_base* base)
{
util::ThreadRename("http");
SetSyscallSandboxPolicy(SyscallSandboxPolicy::NET_HTTP_SERVER);
LogPrint(BCLog::HTTP, "Entering http event loop\n");
event_base_dispatch(base);
// Event loop will be interrupted by InterruptHTTPServer()
@ -350,7 +348,6 @@ static bool HTTPBindAddresses(struct evhttp* http)
static void HTTPWorkQueueRun(WorkQueue<HTTPClosure>* queue, int worker_num)
{
util::ThreadRename(strprintf("httpworker.%i", worker_num));
SetSyscallSandboxPolicy(SyscallSandboxPolicy::NET_HTTP_SERVER_WORKER);
queue->Run();
}

View File

@ -14,7 +14,6 @@
#include <node/interface_ui.h>
#include <shutdown.h>
#include <tinyformat.h>
#include <util/syscall_sandbox.h>
#include <util/thread.h>
#include <util/translation.h>
#include <validation.h> // For g_chainman
@ -167,7 +166,6 @@ static const CBlockIndex* NextSyncBlock(const CBlockIndex* pindex_prev, CChain&
void BaseIndex::ThreadSync()
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::TX_INDEX);
// Wait for a possible reindex-chainstate to finish until continuing
// with the index sync
while (!g_indexes_ready_to_sync) {

View File

@ -80,7 +80,6 @@
#include <util/result.h>
#include <util/strencodings.h>
#include <util/string.h>
#include <util/syscall_sandbox.h>
#include <util/syserror.h>
#include <util/thread.h>
#include <util/threadnames.h>
@ -630,10 +629,6 @@ void SetupServerArgs(ArgsManager& argsman)
hidden_args.emplace_back("-daemonwait");
#endif
#if defined(USE_SYSCALL_SANDBOX)
argsman.AddArg("-sandbox=<mode>", "Use the experimental syscall sandbox in the specified mode (-sandbox=log-and-abort or -sandbox=abort). Allow only expected syscalls to be used by bitcoind. Note that this is an experimental new feature that may cause bitcoind to exit or crash unexpectedly: use with caution. In the \"log-and-abort\" mode the invocation of an unexpected syscall results in a debug handler being invoked which will log the incident and terminate the program (without executing the unexpected syscall). In the \"abort\" mode the invocation of an unexpected syscall results in the entire process being killed immediately by the kernel without executing the unexpected syscall.", ArgsManager::ALLOW_ANY, OptionsCategory::OPTIONS);
#endif // USE_SYSCALL_SANDBOX
// Add the hidden options
argsman.AddHiddenArgs(hidden_args);
}
@ -844,7 +839,7 @@ bool AppInitBasicSetup(const ArgsManager& args, std::atomic<int>& exit_status)
return true;
}
bool AppInitParameterInteraction(const ArgsManager& args, bool use_syscall_sandbox)
bool AppInitParameterInteraction(const ArgsManager& args)
{
const CChainParams& chainparams = Params();
// ********************************************************* Step 2: parameter interactions
@ -991,40 +986,6 @@ bool AppInitParameterInteraction(const ArgsManager& args, bool use_syscall_sandb
if (args.GetIntArg("-rpcserialversion", DEFAULT_RPC_SERIALIZE_VERSION) > 1)
return InitError(Untranslated("Unknown rpcserialversion requested."));
#if defined(USE_SYSCALL_SANDBOX)
if (args.IsArgSet("-sandbox") && !args.IsArgNegated("-sandbox")) {
const std::string sandbox_arg{args.GetArg("-sandbox", "")};
bool log_syscall_violation_before_terminating{false};
if (sandbox_arg == "log-and-abort") {
log_syscall_violation_before_terminating = true;
} else if (sandbox_arg == "abort") {
// log_syscall_violation_before_terminating is false by default.
} else {
return InitError(Untranslated("Unknown syscall sandbox mode (-sandbox=<mode>). Available modes are \"log-and-abort\" and \"abort\"."));
}
// execve(...) is not allowed by the syscall sandbox.
const std::vector<std::string> features_using_execve{
"-alertnotify",
"-blocknotify",
"-signer",
"-startupnotify",
"-walletnotify",
};
for (const std::string& feature_using_execve : features_using_execve) {
if (!args.GetArg(feature_using_execve, "").empty()) {
return InitError(Untranslated(strprintf("The experimental syscall sandbox feature (-sandbox=<mode>) is incompatible with %s (which uses execve).", feature_using_execve)));
}
}
if (!SetupSyscallSandbox(log_syscall_violation_before_terminating)) {
return InitError(Untranslated("Installation of the syscall sandbox failed."));
}
if (use_syscall_sandbox) {
SetSyscallSandboxPolicy(SyscallSandboxPolicy::INITIALIZATION);
}
LogPrintf("Experimental syscall sandbox enabled (-sandbox=%s): bitcoind will terminate if an unexpected (not allowlisted) syscall is invoked.\n", sandbox_arg);
}
#endif // USE_SYSCALL_SANDBOX
// Also report errors from parsing before daemonization
{
KernelNotifications notifications{};

View File

@ -44,7 +44,7 @@ bool AppInitBasicSetup(const ArgsManager& args, std::atomic<int>& exit_status);
* @note This can be done before daemonization. Do not call Shutdown() if this function fails.
* @pre Parameters should be parsed and config file should be read, AppInitBasicSetup should have been called.
*/
bool AppInitParameterInteraction(const ArgsManager& args, bool use_syscall_sandbox = true);
bool AppInitParameterInteraction(const ArgsManager& args);
/**
* Initialization sanity checks.
* @note This can be done before daemonization. Do not call Shutdown() if this function fails.

View File

@ -14,7 +14,6 @@
#include <net.h>
#include <netaddress.h>
#include <netbase.h>
#include <util/syscall_sandbox.h>
#include <util/thread.h>
#include <util/threadinterrupt.h>
@ -219,7 +218,6 @@ static bool ProcessUpnp()
static void ThreadMapPort()
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::INITIALIZATION_MAP_PORT);
bool ok;
do {
ok = false;

View File

@ -30,7 +30,6 @@
#include <util/fs.h>
#include <util/sock.h>
#include <util/strencodings.h>
#include <util/syscall_sandbox.h>
#include <util/thread.h>
#include <util/threadinterrupt.h>
#include <util/trace.h>
@ -1381,7 +1380,6 @@ void CConnman::ThreadSocketHandler()
{
AssertLockNotHeld(m_total_bytes_sent_mutex);
SetSyscallSandboxPolicy(SyscallSandboxPolicy::NET);
while (!interruptNet)
{
DisconnectNodes();
@ -1401,7 +1399,6 @@ void CConnman::WakeMessageHandler()
void CConnman::ThreadDNSAddressSeed()
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::INITIALIZATION_DNS_SEED);
FastRandomContext rng;
std::vector<std::string> seeds = Params().DNSSeeds();
Shuffle(seeds.begin(), seeds.end(), rng);
@ -1607,7 +1604,6 @@ std::unordered_set<Network> CConnman::GetReachableEmptyNetworks() const
void CConnman::ThreadOpenConnections(const std::vector<std::string> connect)
{
AssertLockNotHeld(m_unused_i2p_sessions_mutex);
SetSyscallSandboxPolicy(SyscallSandboxPolicy::NET_OPEN_CONNECTION);
FastRandomContext rng;
// Connect to specific addresses
if (!connect.empty())
@ -1976,7 +1972,6 @@ std::vector<AddedNodeInfo> CConnman::GetAddedNodeInfo() const
void CConnman::ThreadOpenAddedConnections()
{
AssertLockNotHeld(m_unused_i2p_sessions_mutex);
SetSyscallSandboxPolicy(SyscallSandboxPolicy::NET_ADD_CONNECTION);
while (true)
{
CSemaphoreGrant grant(*semAddnode);
@ -2045,7 +2040,6 @@ void CConnman::ThreadMessageHandler()
{
LOCK(NetEventsInterface::g_msgproc_mutex);
SetSyscallSandboxPolicy(SyscallSandboxPolicy::MESSAGE_HANDLER);
while (!flagInterruptMsgProc)
{
bool fMoreWork = false;

View File

@ -19,7 +19,6 @@
#include <undo.h>
#include <util/batchpriority.h>
#include <util/fs.h>
#include <util/syscall_sandbox.h>
#include <validation.h>
#include <map>
@ -869,7 +868,6 @@ public:
void ThreadImport(ChainstateManager& chainman, std::vector<fs::path> vImportFiles, const fs::path& mempool_path)
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::INITIALIZATION_LOAD_BLOCKS);
ScheduleBatchPriority();
{

View File

@ -94,7 +94,7 @@ public:
bool baseInitialize() override
{
if (!AppInitBasicSetup(args(), Assert(context())->exit_status)) return false;
if (!AppInitParameterInteraction(args(), /*use_syscall_sandbox=*/false)) return false;
if (!AppInitParameterInteraction(args())) return false;
m_context->kernel = std::make_unique<kernel::Context>();
if (!AppInitSanityChecks(*m_context->kernel)) return false;

View File

@ -21,7 +21,6 @@
#include <univalue.h>
#include <util/any.h>
#include <util/check.h>
#include <util/syscall_sandbox.h>
#include <stdint.h>
#ifdef HAVE_MALLOC_INFO
@ -70,27 +69,6 @@ static RPCHelpMan setmocktime()
};
}
#if defined(USE_SYSCALL_SANDBOX)
static RPCHelpMan invokedisallowedsyscall()
{
return RPCHelpMan{
"invokedisallowedsyscall",
"\nInvoke a disallowed syscall to trigger a syscall sandbox violation. Used for testing purposes.\n",
{},
RPCResult{RPCResult::Type::NONE, "", ""},
RPCExamples{
HelpExampleCli("invokedisallowedsyscall", "") + HelpExampleRpc("invokedisallowedsyscall", "")},
[&](const RPCHelpMan& self, const JSONRPCRequest& request) -> UniValue {
if (!Params().IsTestChain()) {
throw std::runtime_error("invokedisallowedsyscall is used for testing only.");
}
TestDisallowedSandboxCall();
return UniValue::VNULL;
},
};
}
#endif // USE_SYSCALL_SANDBOX
static RPCHelpMan mockscheduler()
{
return RPCHelpMan{"mockscheduler",
@ -428,9 +406,6 @@ void RegisterNodeRPCCommands(CRPCTable& t)
{"hidden", &echo},
{"hidden", &echojson},
{"hidden", &echoipc},
#if defined(USE_SYSCALL_SANDBOX)
{"hidden", &invokedisallowedsyscall},
#endif // USE_SYSCALL_SANDBOX
};
for (const auto& c : commands) {
t.appendCommand(c.name, &c);

View File

@ -5,7 +5,6 @@
#include <scheduler.h>
#include <sync.h>
#include <util/syscall_sandbox.h>
#include <util/time.h>
#include <cassert>
@ -23,7 +22,6 @@ CScheduler::~CScheduler()
void CScheduler::serviceQueue()
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::SCHEDULER);
WAIT_LOCK(newTaskMutex, lock);
++nThreadsServicingQueue;

View File

@ -16,7 +16,6 @@
#include <netbase.h>
#include <util/readwritefile.h>
#include <util/strencodings.h>
#include <util/syscall_sandbox.h>
#include <util/thread.h>
#include <util/time.h>
@ -653,7 +652,6 @@ static std::thread torControlThread;
static void TorControlThread(CService onion_service_target)
{
SetSyscallSandboxPolicy(SyscallSandboxPolicy::TOR_CONTROL);
TorController ctrl(gBase, gArgs.GetArg("-torcontrol", DEFAULT_TOR_CONTROL), onion_service_target);
event_base_dispatch(gBase);

View File

@ -1,927 +0,0 @@
// Copyright (c) 2020-2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#if defined(HAVE_CONFIG_H)
#include <config/bitcoin-config.h>
#endif // defined(HAVE_CONFIG_H)
#include <util/syscall_sandbox.h>
#if defined(USE_SYSCALL_SANDBOX)
#include <array>
#include <cassert>
#include <cstdint>
#include <exception>
#include <map>
#include <new>
#include <set>
#include <string>
#include <vector>
#include <logging.h>
#include <tinyformat.h>
#include <util/threadnames.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <linux/unistd.h>
#include <signal.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <unistd.h>
namespace {
bool g_syscall_sandbox_enabled{false};
bool g_syscall_sandbox_log_violation_before_terminating{false};
#if !defined(__x86_64__)
#error Syscall sandbox is an experimental feature currently available only under Linux x86-64.
#endif // defined(__x86_64__)
#ifndef SECCOMP_RET_KILL_PROCESS
#define SECCOMP_RET_KILL_PROCESS 0x80000000U
#endif
// Define system call numbers for x86_64 that are referenced in the system call profile
// but not provided by the kernel headers used in the GUIX build.
// Usually, they can be found via "grep name /usr/include/x86_64-linux-gnu/asm/unistd_64.h"
#ifndef __NR_clone3
#define __NR_clone3 435
#endif
#ifndef __NR_statx
#define __NR_statx 332
#endif
#ifndef __NR_getrandom
#define __NR_getrandom 318
#endif
#ifndef __NR_membarrier
#define __NR_membarrier 324
#endif
#ifndef __NR_copy_file_range
#define __NR_copy_file_range 326
#endif
#ifndef __NR_rseq
#define __NR_rseq 334
#endif
// This list of syscalls in LINUX_SYSCALLS is only used to map syscall numbers to syscall names in
// order to be able to print user friendly error messages which include the syscall name in addition
// to the syscall number.
//
// Example output in case of a syscall violation where the syscall is present in LINUX_SYSCALLS:
//
// ```
// 2021-06-09T12:34:56Z ERROR: The syscall "execve" (syscall number 59) is not allowed by the syscall sandbox in thread "msghand". Please report.
// ```
//
// Example output in case of a syscall violation where the syscall is not present in LINUX_SYSCALLS:
//
// ```
// 2021-06-09T12:34:56Z ERROR: The syscall "*unknown*" (syscall number 314) is not allowed by the syscall sandbox in thread "msghand". Please report.
// ``
//
// LINUX_SYSCALLS contains two types of syscalls:
// 1.) Syscalls that are present under all architectures or relevant Linux kernel versions for which
// we support the syscall sandbox feature (currently only Linux x86-64). Examples include read,
// write, open, close, etc.
// 2.) Syscalls that are present under a subset of architectures or relevant Linux kernel versions
// for which we support the syscall sandbox feature. This type of syscalls should be added to
// LINUX_SYSCALLS conditional on availability like in the following example:
// ...
// #if defined(__NR_arch_dependent_syscall)
// {__NR_arch_dependent_syscall, "arch_dependent_syscall"},
// #endif // defined(__NR_arch_dependent_syscall)
// ...
const std::map<uint32_t, std::string> LINUX_SYSCALLS{
{__NR_accept, "accept"},
{__NR_accept4, "accept4"},
{__NR_access, "access"},
{__NR_acct, "acct"},
{__NR_add_key, "add_key"},
{__NR_adjtimex, "adjtimex"},
{__NR_afs_syscall, "afs_syscall"},
{__NR_alarm, "alarm"},
{__NR_arch_prctl, "arch_prctl"},
{__NR_bind, "bind"},
{__NR_bpf, "bpf"},
{__NR_brk, "brk"},
{__NR_capget, "capget"},
{__NR_capset, "capset"},
{__NR_chdir, "chdir"},
{__NR_chmod, "chmod"},
{__NR_chown, "chown"},
{__NR_chroot, "chroot"},
{__NR_clock_adjtime, "clock_adjtime"},
{__NR_clock_getres, "clock_getres"},
{__NR_clock_gettime, "clock_gettime"},
{__NR_clock_nanosleep, "clock_nanosleep"},
{__NR_clock_settime, "clock_settime"},
{__NR_clone, "clone"},
{__NR_clone3, "clone3"},
{__NR_close, "close"},
{__NR_connect, "connect"},
{__NR_copy_file_range, "copy_file_range"},
{__NR_creat, "creat"},
{__NR_create_module, "create_module"},
{__NR_delete_module, "delete_module"},
{__NR_dup, "dup"},
{__NR_dup2, "dup2"},
{__NR_dup3, "dup3"},
{__NR_epoll_create, "epoll_create"},
{__NR_epoll_create1, "epoll_create1"},
{__NR_epoll_ctl, "epoll_ctl"},
{__NR_epoll_ctl_old, "epoll_ctl_old"},
{__NR_epoll_pwait, "epoll_pwait"},
{__NR_epoll_wait, "epoll_wait"},
{__NR_epoll_wait_old, "epoll_wait_old"},
{__NR_eventfd, "eventfd"},
{__NR_eventfd2, "eventfd2"},
{__NR_execve, "execve"},
{__NR_execveat, "execveat"},
{__NR_exit, "exit"},
{__NR_exit_group, "exit_group"},
{__NR_faccessat, "faccessat"},
{__NR_fadvise64, "fadvise64"},
{__NR_fallocate, "fallocate"},
{__NR_fanotify_init, "fanotify_init"},
{__NR_fanotify_mark, "fanotify_mark"},
{__NR_fchdir, "fchdir"},
{__NR_fchmod, "fchmod"},
{__NR_fchmodat, "fchmodat"},
{__NR_fchown, "fchown"},
{__NR_fchownat, "fchownat"},
{__NR_fcntl, "fcntl"},
{__NR_fdatasync, "fdatasync"},
{__NR_fgetxattr, "fgetxattr"},
{__NR_finit_module, "finit_module"},
{__NR_flistxattr, "flistxattr"},
{__NR_flock, "flock"},
{__NR_fork, "fork"},
{__NR_fremovexattr, "fremovexattr"},
{__NR_fsetxattr, "fsetxattr"},
{__NR_fstat, "fstat"},
{__NR_fstatfs, "fstatfs"},
{__NR_fsync, "fsync"},
{__NR_ftruncate, "ftruncate"},
{__NR_futex, "futex"},
{__NR_futimesat, "futimesat"},
{__NR_get_kernel_syms, "get_kernel_syms"},
{__NR_get_mempolicy, "get_mempolicy"},
{__NR_get_robust_list, "get_robust_list"},
{__NR_get_thread_area, "get_thread_area"},
{__NR_getcpu, "getcpu"},
{__NR_getcwd, "getcwd"},
{__NR_getdents, "getdents"},
{__NR_getdents64, "getdents64"},
{__NR_getegid, "getegid"},
{__NR_geteuid, "geteuid"},
{__NR_getgid, "getgid"},
{__NR_getgroups, "getgroups"},
{__NR_getitimer, "getitimer"},
{__NR_getpeername, "getpeername"},
{__NR_getpgid, "getpgid"},
{__NR_getpgrp, "getpgrp"},
{__NR_getpid, "getpid"},
{__NR_getpmsg, "getpmsg"},
{__NR_getppid, "getppid"},
{__NR_getpriority, "getpriority"},
{__NR_getrandom, "getrandom"},
{__NR_getresgid, "getresgid"},
{__NR_getresuid, "getresuid"},
{__NR_getrlimit, "getrlimit"},
{__NR_getrusage, "getrusage"},
{__NR_getsid, "getsid"},
{__NR_getsockname, "getsockname"},
{__NR_getsockopt, "getsockopt"},
{__NR_gettid, "gettid"},
{__NR_gettimeofday, "gettimeofday"},
{__NR_getuid, "getuid"},
{__NR_getxattr, "getxattr"},
{__NR_init_module, "init_module"},
{__NR_inotify_add_watch, "inotify_add_watch"},
{__NR_inotify_init, "inotify_init"},
{__NR_inotify_init1, "inotify_init1"},
{__NR_inotify_rm_watch, "inotify_rm_watch"},
{__NR_io_cancel, "io_cancel"},
{__NR_io_destroy, "io_destroy"},
{__NR_io_getevents, "io_getevents"},
{__NR_io_setup, "io_setup"},
{__NR_io_submit, "io_submit"},
{__NR_ioctl, "ioctl"},
{__NR_ioperm, "ioperm"},
{__NR_iopl, "iopl"},
{__NR_ioprio_get, "ioprio_get"},
{__NR_ioprio_set, "ioprio_set"},
{__NR_kcmp, "kcmp"},
{__NR_kexec_file_load, "kexec_file_load"},
{__NR_kexec_load, "kexec_load"},
{__NR_keyctl, "keyctl"},
{__NR_kill, "kill"},
{__NR_lchown, "lchown"},
{__NR_lgetxattr, "lgetxattr"},
{__NR_link, "link"},
{__NR_linkat, "linkat"},
{__NR_listen, "listen"},
{__NR_listxattr, "listxattr"},
{__NR_llistxattr, "llistxattr"},
{__NR_lookup_dcookie, "lookup_dcookie"},
{__NR_lremovexattr, "lremovexattr"},
{__NR_lseek, "lseek"},
{__NR_lsetxattr, "lsetxattr"},
{__NR_lstat, "lstat"},
{__NR_madvise, "madvise"},
{__NR_mbind, "mbind"},
{__NR_membarrier, "membarrier"},
{__NR_memfd_create, "memfd_create"},
{__NR_migrate_pages, "migrate_pages"},
{__NR_mincore, "mincore"},
{__NR_mkdir, "mkdir"},
{__NR_mkdirat, "mkdirat"},
{__NR_mknod, "mknod"},
{__NR_mknodat, "mknodat"},
{__NR_mlock, "mlock"},
{__NR_mlock2, "mlock2"},
{__NR_mlockall, "mlockall"},
{__NR_mmap, "mmap"},
{__NR_modify_ldt, "modify_ldt"},
{__NR_mount, "mount"},
{__NR_move_pages, "move_pages"},
{__NR_mprotect, "mprotect"},
{__NR_mq_getsetattr, "mq_getsetattr"},
{__NR_mq_notify, "mq_notify"},
{__NR_mq_open, "mq_open"},
{__NR_mq_timedreceive, "mq_timedreceive"},
{__NR_mq_timedsend, "mq_timedsend"},
{__NR_mq_unlink, "mq_unlink"},
{__NR_mremap, "mremap"},
{__NR_msgctl, "msgctl"},
{__NR_msgget, "msgget"},
{__NR_msgrcv, "msgrcv"},
{__NR_msgsnd, "msgsnd"},
{__NR_msync, "msync"},
{__NR_munlock, "munlock"},
{__NR_munlockall, "munlockall"},
{__NR_munmap, "munmap"},
{__NR_name_to_handle_at, "name_to_handle_at"},
{__NR_nanosleep, "nanosleep"},
{__NR_newfstatat, "newfstatat"},
{__NR_nfsservctl, "nfsservctl"},
{__NR_open, "open"},
{__NR_open_by_handle_at, "open_by_handle_at"},
{__NR_openat, "openat"},
{__NR_pause, "pause"},
{__NR_perf_event_open, "perf_event_open"},
{__NR_personality, "personality"},
{__NR_pipe, "pipe"},
{__NR_pipe2, "pipe2"},
{__NR_pivot_root, "pivot_root"},
#ifdef __NR_pkey_alloc
{__NR_pkey_alloc, "pkey_alloc"},
#endif
#ifdef __NR_pkey_free
{__NR_pkey_free, "pkey_free"},
#endif
#ifdef __NR_pkey_mprotect
{__NR_pkey_mprotect, "pkey_mprotect"},
#endif
{__NR_poll, "poll"},
{__NR_ppoll, "ppoll"},
{__NR_prctl, "prctl"},
{__NR_pread64, "pread64"},
{__NR_preadv, "preadv"},
#ifdef __NR_preadv2
{__NR_preadv2, "preadv2"},
#endif
{__NR_prlimit64, "prlimit64"},
{__NR_process_vm_readv, "process_vm_readv"},
{__NR_process_vm_writev, "process_vm_writev"},
{__NR_pselect6, "pselect6"},
{__NR_ptrace, "ptrace"},
{__NR_putpmsg, "putpmsg"},
{__NR_pwrite64, "pwrite64"},
{__NR_pwritev, "pwritev"},
#ifdef __NR_pwritev2
{__NR_pwritev2, "pwritev2"},
#endif
{__NR__sysctl, "_sysctl"},
{__NR_query_module, "query_module"},
{__NR_quotactl, "quotactl"},
{__NR_read, "read"},
{__NR_readahead, "readahead"},
{__NR_readlink, "readlink"},
{__NR_readlinkat, "readlinkat"},
{__NR_readv, "readv"},
{__NR_reboot, "reboot"},
{__NR_recvfrom, "recvfrom"},
{__NR_recvmmsg, "recvmmsg"},
{__NR_recvmsg, "recvmsg"},
{__NR_remap_file_pages, "remap_file_pages"},
{__NR_removexattr, "removexattr"},
{__NR_rename, "rename"},
{__NR_renameat, "renameat"},
{__NR_renameat2, "renameat2"},
{__NR_request_key, "request_key"},
{__NR_restart_syscall, "restart_syscall"},
{__NR_rmdir, "rmdir"},
{__NR_rseq, "rseq"},
{__NR_rt_sigaction, "rt_sigaction"},
{__NR_rt_sigpending, "rt_sigpending"},
{__NR_rt_sigprocmask, "rt_sigprocmask"},
{__NR_rt_sigqueueinfo, "rt_sigqueueinfo"},
{__NR_rt_sigreturn, "rt_sigreturn"},
{__NR_rt_sigsuspend, "rt_sigsuspend"},
{__NR_rt_sigtimedwait, "rt_sigtimedwait"},
{__NR_rt_tgsigqueueinfo, "rt_tgsigqueueinfo"},
{__NR_sched_get_priority_max, "sched_get_priority_max"},
{__NR_sched_get_priority_min, "sched_get_priority_min"},
{__NR_sched_getaffinity, "sched_getaffinity"},
{__NR_sched_getattr, "sched_getattr"},
{__NR_sched_getparam, "sched_getparam"},
{__NR_sched_getscheduler, "sched_getscheduler"},
{__NR_sched_rr_get_interval, "sched_rr_get_interval"},
{__NR_sched_setaffinity, "sched_setaffinity"},
{__NR_sched_setattr, "sched_setattr"},
{__NR_sched_setparam, "sched_setparam"},
{__NR_sched_setscheduler, "sched_setscheduler"},
{__NR_sched_yield, "sched_yield"},
{__NR_seccomp, "seccomp"},
{__NR_security, "security"},
{__NR_select, "select"},
{__NR_semctl, "semctl"},
{__NR_semget, "semget"},
{__NR_semop, "semop"},
{__NR_semtimedop, "semtimedop"},
{__NR_sendfile, "sendfile"},
{__NR_sendmmsg, "sendmmsg"},
{__NR_sendmsg, "sendmsg"},
{__NR_sendto, "sendto"},
{__NR_set_mempolicy, "set_mempolicy"},
{__NR_set_robust_list, "set_robust_list"},
{__NR_set_thread_area, "set_thread_area"},
{__NR_set_tid_address, "set_tid_address"},
{__NR_setdomainname, "setdomainname"},
{__NR_setfsgid, "setfsgid"},
{__NR_setfsuid, "setfsuid"},
{__NR_setgid, "setgid"},
{__NR_setgroups, "setgroups"},
{__NR_sethostname, "sethostname"},
{__NR_setitimer, "setitimer"},
{__NR_setns, "setns"},
{__NR_setpgid, "setpgid"},
{__NR_setpriority, "setpriority"},
{__NR_setregid, "setregid"},
{__NR_setresgid, "setresgid"},
{__NR_setresuid, "setresuid"},
{__NR_setreuid, "setreuid"},
{__NR_setrlimit, "setrlimit"},
{__NR_setsid, "setsid"},
{__NR_setsockopt, "setsockopt"},
{__NR_settimeofday, "settimeofday"},
{__NR_setuid, "setuid"},
{__NR_setxattr, "setxattr"},
{__NR_shmat, "shmat"},
{__NR_shmctl, "shmctl"},
{__NR_shmdt, "shmdt"},
{__NR_shmget, "shmget"},
{__NR_shutdown, "shutdown"},
{__NR_sigaltstack, "sigaltstack"},
{__NR_signalfd, "signalfd"},
{__NR_signalfd4, "signalfd4"},
{__NR_socket, "socket"},
{__NR_socketpair, "socketpair"},
{__NR_splice, "splice"},
{__NR_stat, "stat"},
{__NR_statfs, "statfs"},
{__NR_statx, "statx"},
{__NR_swapoff, "swapoff"},
{__NR_swapon, "swapon"},
{__NR_symlink, "symlink"},
{__NR_symlinkat, "symlinkat"},
{__NR_sync, "sync"},
{__NR_sync_file_range, "sync_file_range"},
{__NR_syncfs, "syncfs"},
{__NR_sysfs, "sysfs"},
{__NR_sysinfo, "sysinfo"},
{__NR_syslog, "syslog"},
{__NR_tee, "tee"},
{__NR_tgkill, "tgkill"},
{__NR_time, "time"},
{__NR_timer_create, "timer_create"},
{__NR_timer_delete, "timer_delete"},
{__NR_timer_getoverrun, "timer_getoverrun"},
{__NR_timer_gettime, "timer_gettime"},
{__NR_timer_settime, "timer_settime"},
{__NR_timerfd_create, "timerfd_create"},
{__NR_timerfd_gettime, "timerfd_gettime"},
{__NR_timerfd_settime, "timerfd_settime"},
{__NR_times, "times"},
{__NR_tkill, "tkill"},
{__NR_truncate, "truncate"},
{__NR_tuxcall, "tuxcall"},
{__NR_umask, "umask"},
{__NR_umount2, "umount2"},
{__NR_uname, "uname"},
{__NR_unlink, "unlink"},
{__NR_unlinkat, "unlinkat"},
{__NR_unshare, "unshare"},
{__NR_uselib, "uselib"},
{__NR_userfaultfd, "userfaultfd"},
{__NR_ustat, "ustat"},
{__NR_utime, "utime"},
{__NR_utimensat, "utimensat"},
{__NR_utimes, "utimes"},
{__NR_vfork, "vfork"},
{__NR_vhangup, "vhangup"},
{__NR_vmsplice, "vmsplice"},
{__NR_vserver, "vserver"},
{__NR_wait4, "wait4"},
{__NR_waitid, "waitid"},
{__NR_write, "write"},
{__NR_writev, "writev"},
};
std::string GetLinuxSyscallName(uint32_t syscall_number)
{
const auto element = LINUX_SYSCALLS.find(syscall_number);
if (element != LINUX_SYSCALLS.end()) {
return element->second;
}
return "*unknown*";
}
// See Linux kernel developer Kees Cook's seccomp guide at <https://outflux.net/teach-seccomp/> for
// an accessible introduction to using seccomp.
//
// This function largely follows <https://outflux.net/teach-seccomp/step-3/syscall-reporter.c> and
// <https://outflux.net/teach-seccomp/step-3/seccomp-bpf.h>.
//
// Seccomp BPF resources:
// * Seccomp BPF documentation: <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html>
// * seccomp(2) manual page: <https://www.kernel.org/doc/man-pages/online/pages/man2/seccomp.2.html>
// * Seccomp BPF demo code samples: <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/samples/seccomp>
void SyscallSandboxDebugSignalHandler(int, siginfo_t* signal_info, void* void_signal_context)
{
// The si_code field inside the siginfo_t argument that is passed to a SA_SIGINFO signal handler
// is a value indicating why the signal was sent.
//
// The following value can be placed in si_code for a SIGSYS signal:
// * SYS_SECCOMP (since Linux 3.5): Triggered by a seccomp(2) filter rule.
constexpr int32_t SYS_SECCOMP_SI_CODE{1};
assert(signal_info->si_code == SYS_SECCOMP_SI_CODE);
// The ucontext_t structure contains signal context information that was saved on the user-space
// stack by the kernel.
const ucontext_t* signal_context = static_cast<ucontext_t*>(void_signal_context);
assert(signal_context != nullptr);
std::set_new_handler(std::terminate);
// Portability note: REG_RAX is Linux x86_64 specific.
const uint32_t syscall_number = static_cast<uint32_t>(signal_context->uc_mcontext.gregs[REG_RAX]);
const std::string syscall_name = GetLinuxSyscallName(syscall_number);
const std::string thread_name = !util::ThreadGetInternalName().empty() ? util::ThreadGetInternalName() : "*unnamed*";
const std::string error_message = strprintf("ERROR: The syscall \"%s\" (syscall number %d) is not allowed by the syscall sandbox in thread \"%s\". Please report.", syscall_name, syscall_number, thread_name);
tfm::format(std::cerr, "%s\n", error_message);
LogPrintf("%s\n", error_message);
std::terminate();
}
// This function largely follows install_syscall_reporter from Kees Cook's seccomp guide:
// <https://outflux.net/teach-seccomp/step-3/syscall-reporter.c>
bool SetupSyscallSandboxDebugHandler()
{
struct sigaction action = {};
sigset_t mask;
sigemptyset(&mask);
sigaddset(&mask, SIGSYS);
action.sa_sigaction = &SyscallSandboxDebugSignalHandler;
action.sa_flags = SA_SIGINFO;
if (sigaction(SIGSYS, &action, nullptr) < 0) {
return false;
}
if (sigprocmask(SIG_UNBLOCK, &mask, nullptr)) {
return false;
}
return true;
}
enum class SyscallSandboxAction {
KILL_PROCESS,
INVOKE_SIGNAL_HANDLER,
};
class SeccompPolicyBuilder
{
std::set<uint32_t> allowed_syscalls;
public:
SeccompPolicyBuilder()
{
// Allowed by default.
AllowAddressSpaceAccess();
AllowEpoll();
AllowEventFd();
AllowFutex();
AllowGeneralIo();
AllowGetRandom();
AllowGetSimpleId();
AllowGetTime();
AllowGlobalProcessEnvironment();
AllowGlobalSystemStatus();
AllowKernelInternalApi();
AllowNetworkSocketInformation();
AllowOperationOnExistingFileDescriptor();
AllowPipe();
AllowPrctl();
AllowProcessStartOrDeath();
AllowScheduling();
AllowSignalHandling();
AllowSleep();
AllowUmask();
}
void AllowAddressSpaceAccess()
{
allowed_syscalls.insert(__NR_brk); // change data segment size
allowed_syscalls.insert(__NR_madvise); // give advice about use of memory
allowed_syscalls.insert(__NR_membarrier); // issue memory barriers on a set of threads
allowed_syscalls.insert(__NR_mincore); // check if virtual memory is in RAM
allowed_syscalls.insert(__NR_mlock); // lock memory
allowed_syscalls.insert(__NR_mmap); // map files or devices into memory
allowed_syscalls.insert(__NR_mprotect); // set protection on a region of memory
allowed_syscalls.insert(__NR_mremap); // remap a file in memory
allowed_syscalls.insert(__NR_munlock); // unlock memory
allowed_syscalls.insert(__NR_munmap); // unmap files or devices into memory
}
void AllowEpoll()
{
allowed_syscalls.insert(__NR_epoll_create1); // open an epoll file descriptor
allowed_syscalls.insert(__NR_epoll_ctl); // control interface for an epoll file descriptor
allowed_syscalls.insert(__NR_epoll_pwait); // wait for an I/O event on an epoll file descriptor
allowed_syscalls.insert(__NR_epoll_wait); // wait for an I/O event on an epoll file descriptor
}
void AllowEventFd()
{
allowed_syscalls.insert(__NR_eventfd2); // create a file descriptor for event notification
}
void AllowFileSystem()
{
allowed_syscalls.insert(__NR_access); // check user's permissions for a file
allowed_syscalls.insert(__NR_chdir); // change working directory
allowed_syscalls.insert(__NR_chmod); // change permissions of a file
allowed_syscalls.insert(__NR_copy_file_range); // copy a range of data from one file to another
allowed_syscalls.insert(__NR_fallocate); // manipulate file space
allowed_syscalls.insert(__NR_fchmod); // change permissions of a file
allowed_syscalls.insert(__NR_fchown); // change ownership of a file
allowed_syscalls.insert(__NR_fdatasync); // synchronize a file's in-core state with storage device
allowed_syscalls.insert(__NR_flock); // apply or remove an advisory lock on an open file
allowed_syscalls.insert(__NR_fstat); // get file status
allowed_syscalls.insert(__NR_fstatfs); // get file system status
allowed_syscalls.insert(__NR_fsync); // synchronize a file's in-core state with storage device
allowed_syscalls.insert(__NR_ftruncate); // truncate a file to a specified length
allowed_syscalls.insert(__NR_getcwd); // get current working directory
allowed_syscalls.insert(__NR_getdents); // get directory entries
allowed_syscalls.insert(__NR_getdents64); // get directory entries
allowed_syscalls.insert(__NR_lstat); // get file status
allowed_syscalls.insert(__NR_mkdir); // create a directory
allowed_syscalls.insert(__NR_newfstatat); // get file status
allowed_syscalls.insert(__NR_open); // open and possibly create a file
allowed_syscalls.insert(__NR_openat); // open and possibly create a file
allowed_syscalls.insert(__NR_readlink); // read value of a symbolic link
allowed_syscalls.insert(__NR_rename); // change the name or location of a file
allowed_syscalls.insert(__NR_rmdir); // delete a directory
allowed_syscalls.insert(__NR_sendfile); // transfer data between file descriptors
allowed_syscalls.insert(__NR_stat); // get file status
allowed_syscalls.insert(__NR_statfs); // get filesystem statistics
allowed_syscalls.insert(__NR_statx); // get file status (extended)
allowed_syscalls.insert(__NR_unlink); // delete a name and possibly the file it refers to
allowed_syscalls.insert(__NR_unlinkat); // delete relative to a directory file descriptor
}
void AllowFutex()
{
allowed_syscalls.insert(__NR_futex); // fast user-space locking
allowed_syscalls.insert(__NR_set_robust_list); // set list of robust futexes
}
void AllowGeneralIo()
{
allowed_syscalls.insert(__NR_ioctl); // control device
allowed_syscalls.insert(__NR_lseek); // reposition read/write file offset
allowed_syscalls.insert(__NR_poll); // wait for some event on a file descriptor
allowed_syscalls.insert(__NR_ppoll); // wait for some event on a file descriptor
allowed_syscalls.insert(__NR_pread64); // read from a file descriptor at a given offset
allowed_syscalls.insert(__NR_pwrite64); // write to a file descriptor at a given offset
allowed_syscalls.insert(__NR_read); // read from a file descriptor
allowed_syscalls.insert(__NR_readv); // read data into multiple buffers
allowed_syscalls.insert(__NR_recvfrom); // receive a message from a socket
allowed_syscalls.insert(__NR_recvmsg); // receive a message from a socket
allowed_syscalls.insert(__NR_select); // synchronous I/O multiplexing
allowed_syscalls.insert(__NR_sendmmsg); // send multiple messages on a socket
allowed_syscalls.insert(__NR_sendmsg); // send a message on a socket
allowed_syscalls.insert(__NR_sendto); // send a message on a socket
allowed_syscalls.insert(__NR_write); // write to a file descriptor
allowed_syscalls.insert(__NR_writev); // write data into multiple buffers
}
void AllowGetRandom()
{
allowed_syscalls.insert(__NR_getrandom); // obtain a series of random bytes
}
void AllowGetSimpleId()
{
allowed_syscalls.insert(__NR_getegid); // get group identity
allowed_syscalls.insert(__NR_geteuid); // get user identity
allowed_syscalls.insert(__NR_getgid); // get group identity
allowed_syscalls.insert(__NR_getpgid); // get process group
allowed_syscalls.insert(__NR_getpid); // get process identification
allowed_syscalls.insert(__NR_getppid); // get process identification
allowed_syscalls.insert(__NR_getresgid); // get real, effective and saved group IDs
allowed_syscalls.insert(__NR_getresuid); // get real, effective and saved user IDs
allowed_syscalls.insert(__NR_getsid); // get session ID
allowed_syscalls.insert(__NR_gettid); // get thread identification
allowed_syscalls.insert(__NR_getuid); // get user identity
}
void AllowGetTime()
{
allowed_syscalls.insert(__NR_clock_getres); // find the resolution (precision) of the specified clock
allowed_syscalls.insert(__NR_clock_gettime); // retrieve the time of the specified clock
allowed_syscalls.insert(__NR_gettimeofday); // get timeval
}
void AllowGlobalProcessEnvironment()
{
allowed_syscalls.insert(__NR_getrlimit); // get resource limits
allowed_syscalls.insert(__NR_getrusage); // get resource usage
allowed_syscalls.insert(__NR_prlimit64); // get/set resource limits
}
void AllowGlobalSystemStatus()
{
allowed_syscalls.insert(__NR_sysinfo); // return system information
allowed_syscalls.insert(__NR_uname); // get name and information about current kernel
}
void AllowKernelInternalApi()
{
allowed_syscalls.insert(__NR_restart_syscall); // restart a system call after interruption by a stop signal
}
void AllowNetwork()
{
allowed_syscalls.insert(__NR_accept); // accept a connection on a socket
allowed_syscalls.insert(__NR_accept4); // accept a connection on a socket
allowed_syscalls.insert(__NR_bind); // bind a name to a socket
allowed_syscalls.insert(__NR_connect); // initiate a connection on a socket
allowed_syscalls.insert(__NR_listen); // listen for connections on a socket
allowed_syscalls.insert(__NR_setsockopt); // set options on sockets
allowed_syscalls.insert(__NR_socket); // create an endpoint for communication
allowed_syscalls.insert(__NR_socketpair); // create a pair of connected sockets
}
void AllowNetworkSocketInformation()
{
allowed_syscalls.insert(__NR_getpeername); // get name of connected peer socket
allowed_syscalls.insert(__NR_getsockname); // get socket name
allowed_syscalls.insert(__NR_getsockopt); // get options on sockets
}
void AllowOperationOnExistingFileDescriptor()
{
allowed_syscalls.insert(__NR_close); // close a file descriptor
allowed_syscalls.insert(__NR_dup); // duplicate a file descriptor
allowed_syscalls.insert(__NR_dup2); // duplicate a file descriptor
allowed_syscalls.insert(__NR_fcntl); // manipulate file descriptor
allowed_syscalls.insert(__NR_shutdown); // shut down part of a full-duplex connection
}
void AllowPipe()
{
allowed_syscalls.insert(__NR_pipe); // create pipe
allowed_syscalls.insert(__NR_pipe2); // create pipe
}
void AllowPrctl()
{
allowed_syscalls.insert(__NR_arch_prctl); // set architecture-specific thread state
allowed_syscalls.insert(__NR_prctl); // operations on a process
}
void AllowProcessStartOrDeath()
{
allowed_syscalls.insert(__NR_clone); // create a child process
allowed_syscalls.insert(__NR_clone3); // create a child process
allowed_syscalls.insert(__NR_exit); // terminate the calling process
allowed_syscalls.insert(__NR_exit_group); // exit all threads in a process
allowed_syscalls.insert(__NR_fork); // create a child process
allowed_syscalls.insert(__NR_tgkill); // send a signal to a thread
allowed_syscalls.insert(__NR_wait4); // wait for process to change state, BSD style
allowed_syscalls.insert(__NR_rseq); // register restartable sequence for thread
}
void AllowScheduling()
{
allowed_syscalls.insert(__NR_sched_getaffinity); // set a thread's CPU affinity mask
allowed_syscalls.insert(__NR_sched_getparam); // get scheduling parameters
allowed_syscalls.insert(__NR_sched_getscheduler); // get scheduling policy/parameters
allowed_syscalls.insert(__NR_sched_setscheduler); // set scheduling policy/parameters
allowed_syscalls.insert(__NR_sched_yield); // yield the processor
}
void AllowSignalHandling()
{
allowed_syscalls.insert(__NR_rt_sigaction); // examine and change a signal action
allowed_syscalls.insert(__NR_rt_sigprocmask); // examine and change blocked signals
allowed_syscalls.insert(__NR_rt_sigreturn); // return from signal handler and cleanup stack frame
allowed_syscalls.insert(__NR_sigaltstack); // set and/or get signal stack context
}
void AllowSleep()
{
allowed_syscalls.insert(__NR_clock_nanosleep); // high-resolution sleep with specifiable clock
allowed_syscalls.insert(__NR_nanosleep); // high-resolution sleep
}
void AllowUmask()
{
allowed_syscalls.insert(__NR_umask); // set file mode creation mask
}
// See Linux kernel developer Kees Cook's seccomp guide at <https://outflux.net/teach-seccomp/>
// for an accessible introduction to using seccomp.
//
// This function largely follows <https://outflux.net/teach-seccomp/step-3/seccomp-bpf.h>.
std::vector<sock_filter> BuildFilter(SyscallSandboxAction default_action)
{
std::vector<sock_filter> bpf_policy;
// See VALIDATE_ARCHITECTURE in seccomp-bpf.h referenced above.
bpf_policy.push_back(BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct seccomp_data, arch)));
// Portability note: AUDIT_ARCH_X86_64 is Linux x86_64 specific.
bpf_policy.push_back(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 1, 0));
bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS));
// See EXAMINE_SYSCALL in seccomp-bpf.h referenced above.
bpf_policy.push_back(BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct seccomp_data, nr)));
for (const uint32_t allowed_syscall : allowed_syscalls) {
// See ALLOW_SYSCALL in seccomp-bpf.h referenced above.
bpf_policy.push_back(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, allowed_syscall, 0, 1));
bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW));
}
switch (default_action) {
case SyscallSandboxAction::KILL_PROCESS:
// Disallow syscall and kill the process.
//
// See KILL_PROCESS in seccomp-bpf.h referenced above.
//
// Note that we're using SECCOMP_RET_KILL_PROCESS (kill the process) instead
// of SECCOMP_RET_KILL_THREAD (kill the thread). The SECCOMP_RET_KILL_PROCESS
// action was introduced in Linux 4.14.
//
// SECCOMP_RET_KILL_PROCESS: Results in the entire process exiting immediately without
// executing the system call.
//
// SECCOMP_RET_KILL_PROCESS documentation:
// <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html>
bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS));
break;
case SyscallSandboxAction::INVOKE_SIGNAL_HANDLER:
// Disallow syscall and force a SIGSYS to trigger syscall debug reporter.
//
// SECCOMP_RET_TRAP: Results in the kernel sending a SIGSYS signal to the triggering
// task without executing the system call.
//
// SECCOMP_RET_TRAP documentation:
// <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html>
bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRAP));
break;
}
return bpf_policy;
}
};
} // namespace
bool SetupSyscallSandbox(bool log_syscall_violation_before_terminating)
{
assert(!g_syscall_sandbox_enabled && "SetupSyscallSandbox(...) should only be called once.");
g_syscall_sandbox_enabled = true;
g_syscall_sandbox_log_violation_before_terminating = log_syscall_violation_before_terminating;
if (log_syscall_violation_before_terminating) {
if (!SetupSyscallSandboxDebugHandler()) {
return false;
}
}
return true;
}
void TestDisallowedSandboxCall()
{
// The getgroups syscall is assumed NOT to be allowed by the syscall sandbox policy.
std::array<gid_t, 1> groups;
[[maybe_unused]] int32_t ignored = getgroups(groups.size(), groups.data());
}
#endif // defined(USE_SYSCALL_SANDBOX)
void SetSyscallSandboxPolicy(SyscallSandboxPolicy syscall_policy)
{
#if defined(USE_SYSCALL_SANDBOX)
if (!g_syscall_sandbox_enabled) {
return;
}
SeccompPolicyBuilder seccomp_policy_builder;
switch (syscall_policy) {
case SyscallSandboxPolicy::INITIALIZATION: // Thread: main thread (state: init)
// SyscallSandboxPolicy::INITIALIZATION is the first policy loaded.
//
// Subsequently loaded policies can reduce the abilities further, but
// abilities can never be regained.
//
// SyscallSandboxPolicy::INITIALIZATION must thus be a superset of all
// other policies.
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::INITIALIZATION_DNS_SEED: // Thread: dnsseed
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::INITIALIZATION_LOAD_BLOCKS: // Thread: loadblk
seccomp_policy_builder.AllowFileSystem();
break;
case SyscallSandboxPolicy::INITIALIZATION_MAP_PORT: // Thread: mapport
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::MESSAGE_HANDLER: // Thread: msghand
seccomp_policy_builder.AllowFileSystem();
break;
case SyscallSandboxPolicy::NET: // Thread: net
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::NET_ADD_CONNECTION: // Thread: addcon
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::NET_HTTP_SERVER: // Thread: http
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::NET_HTTP_SERVER_WORKER: // Thread: httpworker.<N>
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::NET_OPEN_CONNECTION: // Thread: opencon
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::SCHEDULER: // Thread: scheduler
seccomp_policy_builder.AllowFileSystem();
break;
case SyscallSandboxPolicy::TOR_CONTROL: // Thread: torcontrol
seccomp_policy_builder.AllowFileSystem();
seccomp_policy_builder.AllowNetwork();
break;
case SyscallSandboxPolicy::TX_INDEX: // Thread: txindex
seccomp_policy_builder.AllowFileSystem();
break;
case SyscallSandboxPolicy::VALIDATION_SCRIPT_CHECK: // Thread: scriptch.<N>
break;
case SyscallSandboxPolicy::SHUTOFF: // Thread: main thread (state: shutoff)
seccomp_policy_builder.AllowFileSystem();
break;
}
const SyscallSandboxAction default_action = g_syscall_sandbox_log_violation_before_terminating ? SyscallSandboxAction::INVOKE_SIGNAL_HANDLER : SyscallSandboxAction::KILL_PROCESS;
std::vector<sock_filter> filter = seccomp_policy_builder.BuildFilter(default_action);
const sock_fprog prog = {
.len = static_cast<uint16_t>(filter.size()),
.filter = filter.data(),
};
// Do not allow abilities to be regained after being dropped.
//
// PR_SET_NO_NEW_PRIVS documentation: <https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html>
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0) {
throw std::runtime_error("Syscall sandbox enforcement failed: prctl(PR_SET_NO_NEW_PRIVS)");
}
// Install seccomp-bpf syscall filter.
//
// PR_SET_SECCOMP documentation: <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html>
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) != 0) {
throw std::runtime_error("Syscall sandbox enforcement failed: prctl(PR_SET_SECCOMP)");
}
const std::string thread_name = !util::ThreadGetInternalName().empty() ? util::ThreadGetInternalName() : "*unnamed*";
LogPrint(BCLog::UTIL, "Syscall filter installed for thread \"%s\"\n", thread_name);
#endif // defined(USE_SYSCALL_SANDBOX)
}

View File

@ -1,54 +0,0 @@
// Copyright (c) 2020-2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_UTIL_SYSCALL_SANDBOX_H
#define BITCOIN_UTIL_SYSCALL_SANDBOX_H
enum class SyscallSandboxPolicy {
// 1. Initialization
INITIALIZATION,
INITIALIZATION_DNS_SEED,
INITIALIZATION_LOAD_BLOCKS,
INITIALIZATION_MAP_PORT,
// 2. Steady state (non-initialization, non-shutdown)
MESSAGE_HANDLER,
NET,
NET_ADD_CONNECTION,
NET_HTTP_SERVER,
NET_HTTP_SERVER_WORKER,
NET_OPEN_CONNECTION,
SCHEDULER,
TOR_CONTROL,
TX_INDEX,
VALIDATION_SCRIPT_CHECK,
// 3. Shutdown
SHUTOFF,
};
//! Force the current thread (and threads created from the current thread) into a restricted-service
//! operating mode where only a subset of all syscalls are available.
//!
//! Subsequent calls to this function can reduce the abilities further, but abilities can never be
//! regained.
//!
//! This function is a no-op unless SetupSyscallSandbox(...) has been called.
//!
//! SetupSyscallSandbox(...) is called during bitcoind initialization if Bitcoin Core was compiled
//! with seccomp-bpf support (--with-seccomp) *and* the parameter -sandbox=<mode> was passed to
//! bitcoind.
//!
//! This experimental feature is available under Linux x86_64 only.
void SetSyscallSandboxPolicy(SyscallSandboxPolicy syscall_policy);
#if defined(USE_SYSCALL_SANDBOX)
//! Setup and enable the experimental syscall sandbox for the running process.
[[nodiscard]] bool SetupSyscallSandbox(bool log_syscall_violation_before_terminating);
//! Invoke a disallowed syscall. Use for testing purposes.
void TestDisallowedSandboxCall();
#endif // defined(USE_SYSCALL_SANDBOX)
#endif // BITCOIN_UTIL_SYSCALL_SANDBOX_H

View File

@ -25,5 +25,4 @@ RPCAUTH=@abs_top_srcdir@/share/rpcauth/rpcauth.py
@ENABLE_FUZZ_TRUE@ENABLE_FUZZ=true
@ENABLE_ZMQ_TRUE@ENABLE_ZMQ=true
@ENABLE_EXTERNAL_SIGNER_TRUE@ENABLE_EXTERNAL_SIGNER=true
@ENABLE_SYSCALL_SANDBOX_TRUE@ENABLE_SYSCALL_SANDBOX=true
@ENABLE_USDT_TRACEPOINTS_TRUE@ENABLE_USDT_TRACEPOINTS=true

View File

@ -30,9 +30,6 @@ class NotificationsTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 2
self.setup_clean_chain = True
# The experimental syscall sandbox feature (-sandbox) is not compatible with -alertnotify,
# -blocknotify, -walletnotify or -shutdownnotify (which all invoke execve).
self.disable_syscall_sandbox = True
def setup_network(self):
self.wallet = ''.join(chr(i) for i in range(FILE_CHAR_START, FILE_CHAR_END) if chr(i) not in FILE_CHARS_DISALLOWED)

View File

@ -18,7 +18,6 @@ FILE_NAME = "test.txt"
class StartupNotifyTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 1
self.disable_syscall_sandbox = True
def run_test(self):
tmpdir_file = os.path.join(self.options.tmpdir, NODE_DIR, FILE_NAME)

View File

@ -1,34 +0,0 @@
#!/usr/bin/env python3
# Copyright (c) 2021-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
"""Test bitcoind aborts if a disallowed syscall is used when compiled with the syscall sandbox."""
from test_framework.test_framework import BitcoinTestFramework, SkipTest
class SyscallSandboxTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 1
def skip_test_if_missing_module(self):
if not self.is_syscall_sandbox_compiled():
raise SkipTest("bitcoind has not been built with syscall sandbox enabled.")
if self.disable_syscall_sandbox:
raise SkipTest("--nosandbox passed to test runner.")
def run_test(self):
disallowed_syscall_terminated_bitcoind = False
expected_log_entry = 'ERROR: The syscall "getgroups" (syscall number 115) is not allowed by the syscall sandbox'
with self.nodes[0].assert_debug_log([expected_log_entry]):
self.log.info("Invoking disallowed syscall")
try:
self.nodes[0].invokedisallowedsyscall()
except ConnectionError:
disallowed_syscall_terminated_bitcoind = True
assert disallowed_syscall_terminated_bitcoind
self.nodes = []
if __name__ == "__main__":
SyscallSandboxTest().main()

View File

@ -28,9 +28,6 @@ class VersionBitsWarningTest(BitcoinTestFramework):
def set_test_params(self):
self.setup_clean_chain = True
self.num_nodes = 1
# The experimental syscall sandbox feature (-sandbox) is not compatible with -alertnotify
# (which invokes execve).
self.disable_syscall_sandbox = True
def setup_network(self):
self.alert_filename = os.path.join(self.options.tmpdir, "alert.txt")

View File

@ -27,9 +27,6 @@ class RPCSignerTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 4
# The experimental syscall sandbox feature (-sandbox) is not compatible with -signer (which
# invokes execve).
self.disable_syscall_sandbox = True
self.extra_args = [
[],

View File

@ -103,7 +103,6 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
self.supports_cli = True
self.bind_to_localhost_only = True
self.parse_args()
self.disable_syscall_sandbox = self.options.nosandbox or self.options.valgrind
self.default_wallet_name = "default_wallet" if self.options.descriptors else ""
self.wallet_data_filename = "wallet.dat"
# Optional list of wallet names that can be set in set_test_params to
@ -160,8 +159,6 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
parser = argparse.ArgumentParser(usage="%(prog)s [options]")
parser.add_argument("--nocleanup", dest="nocleanup", default=False, action="store_true",
help="Leave bitcoinds and test.* datadir on exit or error")
parser.add_argument("--nosandbox", dest="nosandbox", default=False, action="store_true",
help="Don't use the syscall sandbox")
parser.add_argument("--noshutdown", dest="noshutdown", default=False, action="store_true",
help="Don't stop bitcoinds after the test execution")
parser.add_argument("--cachedir", dest="cachedir", default=os.path.abspath(os.path.dirname(os.path.realpath(__file__)) + "/../../cache"),
@ -188,7 +185,7 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
parser.add_argument("--perf", dest="perf", default=False, action="store_true",
help="profile running nodes with perf for the duration of the test")
parser.add_argument("--valgrind", dest="valgrind", default=False, action="store_true",
help="run nodes under the valgrind memory error detector: expect at least a ~10x slowdown. valgrind 3.14 or later required. Forces --nosandbox.")
help="run nodes under the valgrind memory error detector: expect at least a ~10x slowdown. valgrind 3.14 or later required.")
parser.add_argument("--randomseed", type=int,
help="set a random seed for deterministically reproducing a previous test run")
parser.add_argument("--timeout-factor", dest="timeout_factor", type=float, help="adjust test timeouts by a factor. Setting it to 0 disables all timeouts")
@ -497,11 +494,6 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
extra_args = [[]] * num_nodes
if versions is None:
versions = [None] * num_nodes
if self.is_syscall_sandbox_compiled() and not self.disable_syscall_sandbox:
for i in range(len(extra_args)):
# The -sandbox argument is not present in the v22.0 release.
if versions[i] is None or versions[i] >= 229900:
extra_args[i] = extra_args[i] + ["-sandbox=log-and-abort"]
if binary is None:
binary = [get_bin_from_version(v, 'bitcoind', self.options.bitcoind) for v in versions]
if binary_cli is None:
@ -987,7 +979,3 @@ class BitcoinTestFramework(metaclass=BitcoinTestMetaClass):
def is_bdb_compiled(self):
"""Checks whether the wallet module was compiled with BDB support."""
return self.config["components"].getboolean("USE_BDB")
def is_syscall_sandbox_compiled(self):
"""Checks whether the syscall sandbox was compiled."""
return self.config["components"].getboolean("ENABLE_SYSCALL_SANDBOX")

View File

@ -210,7 +210,6 @@ BASE_SCRIPTS = [
'rpc_users.py',
'rpc_whitelist.py',
'feature_proxy.py',
'feature_syscall_sandbox.py',
'wallet_signrawtransactionwithwallet.py --legacy-wallet',
'wallet_signrawtransactionwithwallet.py --descriptors',
'rpc_signrawtransactionwithkey.py',

View File

@ -45,9 +45,6 @@ class WalletSignerTest(BitcoinTestFramework):
def set_test_params(self):
self.num_nodes = 2
# The experimental syscall sandbox feature (-sandbox) is not compatible with -signer (which
# invokes execve).
self.disable_syscall_sandbox = True
self.extra_args = [
[],