libreoffice-online/common/SigUtil.cpp
Michael Meeks a8102212c6 bgsave: ensure kit processes die when their parents do.
Potentially zombie / badly behaving kits should be taken down
by the kernel, and this lets us continue our cleanup by killing
just the parent process.

Change-Id: I1e81f41cded0c67b72622f8ed88602daf427238c
Signed-off-by: Michael Meeks <michael.meeks@collabora.com>
2024-04-16 15:07:18 +01:00

654 lines
17 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
* Copyright the Collabora Online contributors.
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <config.h>
#include "SigUtil.hpp"
#if !defined(__ANDROID__) && !defined(__EMSCRIPTEN__)
# include <execinfo.h>
#endif
#include <csignal>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/uio.h>
#include <unistd.h>
#if !defined(ANDROID) && !defined(IOS)
# include <sys/prctl.h>
#endif
#include <atomic>
#include <cassert>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <string>
#include <thread>
#include <array>
#include <Socket.hpp>
#include "Common.hpp"
#include "Log.hpp"
namespace
{
#ifndef IOS
/// The valid states of the process.
enum class RunState : char
{
Run = 0, //< Normal up-and-running state.
ShutDown, //< Request to shut down gracefully.
Terminate //< Immediate termination.
};
/// Single flag to control the current run state.
static std::atomic<RunState> RunStateFlag(RunState::Run);
#if !MOBILEAPP
static std::atomic<bool> DumpGlobalState(false);
static std::atomic<bool> ForwardSigUsr2Flag(false); //< Flags to forward SIG_USR2 to children.
#endif
#endif
static size_t ActivityStringIndex = 0;
static std::string ActivityHeader;
static std::array<std::string, 16> ActivityStrings;
static bool UnattendedRun = false;
#if !MOBILEAPP
static int SignalLogFD = STDERR_FILENO; //< The FD where signalLogs are dumped.
static char* VersionInfo = nullptr;
static char FatalGdbString[256] = { '\0' };
static SigUtil::SigChildHandler SigChildHandle;
#endif
} // namespace
namespace SigUtil
{
#ifndef IOS
bool getShutdownRequestFlag() { return RunStateFlag >= RunState::ShutDown; }
bool getTerminationFlag() { return RunStateFlag >= RunState::Terminate; }
void setTerminationFlag()
{
// Set the forced-termination flag.
RunStateFlag = RunState::Terminate;
if (!Util::isMobileApp())
{
// And wake-up the thread.
SocketPoll::wakeupWorld();
}
}
void requestShutdown()
{
RunState oldState = RunState::Run;
if (RunStateFlag.compare_exchange_strong(oldState, RunState::ShutDown))
SocketPoll::wakeupWorld();
}
#if MOBILEAPP
void resetTerminationFlags() { RunStateFlag = RunState::Run; }
#endif
#endif // !IOS
void checkDumpGlobalState([[maybe_unused]] GlobalDumpStateFn dumpState)
{
#if !MOBILEAPP
assert(dumpState && "Invalid callback for checkDumpGlobalState");
if (DumpGlobalState)
{
DumpGlobalState = false;
dumpState();
}
#endif
}
void checkForwardSigUsr2([[maybe_unused]] ForwardSigUsr2Fn forwardSigUsr2)
{
#if !MOBILEAPP
assert(forwardSigUsr2 && "Invalid callback for checkForwardSigUsr2");
if (ForwardSigUsr2Flag)
{
ForwardSigUsr2Flag = false;
forwardSigUsr2();
}
#endif
}
void setActivityHeader(const std::string &message)
{
ActivityHeader = message;
}
void addActivity(const std::string &message)
{
ActivityStrings[ActivityStringIndex++ % ActivityStrings.size()] = message;
}
void addActivity(const std::string &viewId, const std::string &message)
{
addActivity("session: " + viewId + ": " + message);
}
void setUnattended()
{
UnattendedRun = true;
}
#if !MOBILEAPP
/// Open the signalLog file.
void signalLogOpen()
{
// Always default to stderr.
SignalLogFD = STDERR_FILENO;
}
/// Close the signalLog file.
void signalLogClose()
{
// We cannot shutdown the logging subsystem
// because freeing memory is not signal safe.
// Flush the IO buffers.
fflush(stdout);
fflush(stderr);
fsync(SignalLogFD);
}
void signalLogPrefix()
{
char buffer[1024];
Log::prefix<sizeof(buffer) - 1>(buffer, "SIG");
signalLog(buffer);
}
// We need a signal safe means of writing messages
// $ man 7 signal
void signalLog(const char *message)
{
while (true)
{
const int length = std::strlen(message);
const int written = write(SignalLogFD, message, length);
if (written < 0)
{
if (errno == EINTR)
continue; // ignore.
else
break;
}
message += written;
if (message[0] == '\0')
break;
}
}
// We need a signal safe means of writing messages
// $ man 7 signal
void signalLogNumber(std::size_t num, int base)
{
int i;
char buf[22];
if (num == 0)
{
signalLog("0");
return;
}
buf[21] = '\0';
assert (base == 10 || base == 16);
for (i = 20; i > 0 && num > 0; --i)
{
int d = num % base;
buf[i] = (d < 10) ? ('0' + d) : ('a' + d - 10);
num /= base;
}
signalLog(buf + i + 1);
}
/// This traps the signal-handler so we don't _Exit
/// while dumping stack trace. It's re-entrant.
/// Used to safely increment and decrement the signal-handler trap.
class SigHandlerTrap
{
static std::atomic<int> SigHandling;
public:
SigHandlerTrap() { ++SigHandlerTrap::SigHandling; }
~SigHandlerTrap() { --SigHandlerTrap::SigHandling; }
/// Check that we have exclusive access to the trap.
/// Otherwise, there is another signal in progress.
bool isExclusive() const
{
// Return true if we are alone.
return SigHandlerTrap::SigHandling == 1;
}
/// Wait for the trap to clear.
static void wait()
{
while (SigHandlerTrap::SigHandling)
sleep(1);
}
};
std::atomic<int> SigHandlerTrap::SigHandling;
void waitSigHandlerTrap()
{
SigHandlerTrap::wait();
}
const char *signalName(const int signo)
{
// LCOV_EXCL_START Coverage for these is not very useful.
switch (signo)
{
#define CASE(x) case SIG##x: return "SIG" #x
CASE(HUP);
CASE(INT);
CASE(QUIT);
CASE(ILL);
CASE(ABRT);
CASE(FPE);
CASE(KILL);
CASE(SEGV);
CASE(PIPE);
CASE(ALRM);
CASE(TERM);
CASE(USR1);
CASE(USR2);
CASE(CHLD);
CASE(CONT);
CASE(STOP);
CASE(TSTP);
CASE(TTIN);
CASE(TTOU);
CASE(BUS);
#ifdef SIGPOLL
CASE(POLL);
#endif
CASE(PROF);
CASE(SYS);
CASE(TRAP);
CASE(URG);
CASE(VTALRM);
CASE(XCPU);
CASE(XFSZ);
#ifdef SIGEMT
CASE(EMT);
#endif
#ifdef SIGSTKFLT
CASE(STKFLT);
#endif
#if defined(SIGIO) && defined(SIGPOLL) && SIGIO != SIGPOLL
CASE(IO);
#endif
#ifdef SIGPWR
CASE(PWR);
#endif
#ifdef SIGLOST
CASE(LOST);
#endif
CASE(WINCH);
#if defined(SIGINFO) && defined(SIGPWR) && SIGINFO != SIGPWR
CASE(INFO);
#endif
#undef CASE
default:
return "unknown";
}
// LCOV_EXCL_STOP Coverage for these is not very useful.
}
static
void handleTerminationSignal(const int signal)
{
const auto onrre = errno; // Save.
bool hardExit = false;
const char* domain;
RunState oldState = RunState::Run;
if ((signal == SIGINT || signal == SIGTERM) &&
RunStateFlag.compare_exchange_strong(oldState, RunState::ShutDown))
{
domain = " Shutdown signal received: ";
}
else
{
assert(RunStateFlag > RunState::Run && "Must have had Terminate flag");
oldState = RunState::ShutDown;
if (RunStateFlag.compare_exchange_strong(oldState, RunState::Terminate))
{
domain = " Forced-Termination signal received: ";
}
else
{
assert(RunStateFlag == RunState::Terminate && "Must have had Terminate flag");
domain = " ok, ok - hard-termination signal received: ";
hardExit = true;
}
}
signalLogOpen();
signalLogPrefix();
signalLog(domain);
signalLog(signalName(signal));
signalLog("\n");
signalLogClose();
if (!hardExit)
SocketPoll::wakeupWorld();
else
{
#if CODE_COVERAGE
__gcov_dump();
#endif
::signal (signal, SIG_DFL);
errno = onrre; // Restore.
::raise (signal);
}
errno = onrre; // Restore.
}
static
void handleFatalSignal(const int signal, siginfo_t *info, void * /* uctxt */)
{
SigHandlerTrap guard;
const bool bReEntered = !guard.isExclusive();
if (!bReEntered)
signalLogOpen();
signalLogPrefix();
// Heap corruption can re-enter through backtrace.
if (bReEntered)
signalLog(" Fatal double signal received: ");
else
signalLog(" Fatal signal received: ");
signalLog(signalName(signal));
if (info)
{
signalLog(" code: ");
signalLogNumber(info->si_code);
signalLog(" for address: 0x");
signalLogNumber((size_t)info->si_addr, 16);
}
signalLog("\n");
signalLog("Recent activity:\n");
signalLog(ActivityHeader.c_str());
for (size_t i = 0; i < ActivityStrings.size(); ++i)
{
size_t idx = (ActivityStringIndex + i) % ActivityStrings.size();
if (!ActivityStrings[idx].empty())
{
// no plausible impl. will heap allocate in c_str.
signalLog("\t");
signalLog(ActivityStrings[idx].c_str());
signalLog("\n");
}
}
struct sigaction action;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
action.sa_handler = SIG_DFL;
sigaction(signal, &action, nullptr);
if (!bReEntered)
{
dumpBacktrace();
signalLogClose();
}
// let default handler process the signal
::raise(signal);
}
void dumpBacktrace()
{
#if !defined(__ANDROID__)
signalLog("\nBacktrace ");
signalLogNumber(getpid());
if (VersionInfo)
{
signalLog(" - ");
signalLog(VersionInfo);
}
signalLog(":\n");
const int maxSlots = 50;
void *backtraceBuffer[maxSlots];
const int numSlots = backtrace(backtraceBuffer, maxSlots);
if (numSlots > 0)
{
backtrace_symbols_fd(backtraceBuffer, numSlots, SignalLogFD);
}
#else
LOG_INF("Backtrace not available on Android.");
#endif
#if !ENABLE_DEBUG
if (std::getenv("COOL_DEBUG"))
#endif
{
if (UnattendedRun)
{
static constexpr auto msg =
"Crashed in unattended run and won't wait for debugger. Re-run without "
"--unattended to attach a debugger.";
std::cerr << msg << std::endl;
}
else
{
signalLog(FatalGdbString);
std::cerr << "Sleeping 60s to allow debugging: attach " << getpid() << std::endl;
sleep(60);
std::cerr << "Finished sleeping to allow debugging of: " << getpid() << std::endl;
}
}
}
void setVersionInfo(const std::string &versionInfo)
{
if (VersionInfo)
free (VersionInfo);
VersionInfo = strdup(versionInfo.c_str());
}
void setFatalSignals(const std::string &versionInfo)
{
struct sigaction action;
setVersionInfo(versionInfo);
// Set up the fatal-signal handler. (N.B. three-argument handler)
sigemptyset(&action.sa_mask);
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = handleFatalSignal;
sigaction(SIGSEGV, &action, nullptr);
sigaction(SIGBUS, &action, nullptr);
sigaction(SIGABRT, &action, nullptr);
sigaction(SIGILL, &action, nullptr);
sigaction(SIGFPE, &action, nullptr);
// Set up the terminatio-signal handler. (N.B. single-argument handler)
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
action.sa_handler = handleTerminationSignal;
sigaction(SIGINT, &action, nullptr);
sigaction(SIGTERM, &action, nullptr);
sigaction(SIGQUIT, &action, nullptr);
sigaction(SIGHUP, &action, nullptr);
// Prepare this in advance just in case.
std::ostringstream stream;
stream << "\nERROR: Fatal signal! Attach debugger with:\n"
<< "sudo gdb --pid=" << getpid() << "\n or \n"
<< "sudo gdb --q --n --ex 'thread apply all backtrace full' --batch --pid="
<< getpid() << '\n';
std::string streamStr = stream.str();
assert (sizeof (FatalGdbString) > strlen(streamStr.c_str()) + 1);
strncpy(FatalGdbString, streamStr.c_str(), sizeof(FatalGdbString)-1);
FatalGdbString[sizeof(FatalGdbString)-1] = '\0';
}
static
void handleSigChild(const int /* signal */, siginfo_t *info, void * /* uctxt */)
{
SigChildHandle(info ? info->si_pid : -1);
}
void setSigChildHandler(SigChildHandler fn)
{
struct sigaction action;
SigChildHandle = fn;
sigemptyset(&action.sa_mask);
if (fn)
{
action.sa_flags = SA_SIGINFO;
action.sa_sigaction = handleSigChild;
}
else
{
action.sa_flags = 0;
action.sa_handler = SIG_DFL;
}
sigaction(SIGCHLD, &action, nullptr);
}
void dieOnParentDeath()
{
#if !defined(ANDROID) && !defined(IOS)
prctl(PR_SET_PDEATHSIG, SIGKILL);
#endif
}
static
void handleUserSignal(const int signal)
{
signalLogOpen();
signalLogPrefix();
signalLog(" User signal received: ");
signalLog(signalName(signal));
signalLog("\n");
if (signal == SIGUSR1)
{
DumpGlobalState = true;
}
else if (signal == SIGUSR2)
{
constexpr int maxSlots = 250;
void* backtraceBuffer[maxSlots];
const int numSlots = backtrace(backtraceBuffer, maxSlots);
if (numSlots > 0)
backtrace_symbols_fd(backtraceBuffer, numSlots, SignalLogFD);
ForwardSigUsr2Flag = true;
}
signalLogClose();
SocketPoll::wakeupWorld();
}
static
void handleDebuggerSignal(const int /*signal*/)
{}
void setUserSignals()
{
struct sigaction action;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
action.sa_handler = handleUserSignal;
sigaction(SIGUSR1, &action, nullptr);
sigaction(SIGUSR2, &action, nullptr);
#if !defined(__ANDROID__)
// Prime backtrace to make sure libgcc is loaded.
constexpr int maxSlots = 1;
void* backtraceBuffer[maxSlots + 1];
backtrace(backtraceBuffer, maxSlots);
#endif
}
void setDebuggerSignal()
{
struct sigaction action;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
action.sa_handler = handleDebuggerSignal;
sigaction(SIGUSR1, &action, nullptr);
}
/// Kill the given pid with SIGKILL as default. Returns true when the pid does not exist any more.
bool killChild(const int pid, const int signal)
{
LOG_DBG("Killing PID: " << pid << " with " << signalName(signal));
// Don't kill anything in the fuzzer case: pid == 0 would kill the fuzzer itself, and
// killing random other processes is not a great idea, either.
if (Util::isFuzzing() || kill(pid, signal) == 0 || errno == ESRCH)
{
// Killed or doesn't exist.
return true;
}
LOG_SYS("Error when trying to kill PID: " << pid << ". Will wait for termination.");
constexpr int sleepMs = 50;
constexpr int count = std::max(CHILD_REBALANCE_INTERVAL_MS / sleepMs, 2);
for (int i = 0; i < count; ++i)
{
if (kill(pid, 0) == 0 || errno == ESRCH)
{
// Doesn't exist.
return true;
}
std::this_thread::sleep_for(std::chrono::milliseconds(sleepMs));
}
return false;
}
#endif // !MOBILEAPP
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */