Ticket #13908: 13908_terminate.patch

File 13908_terminate.patch, 17.4 KB (added by jdemeyer, 9 years ago)
  • c_lib/include/interrupt.h

    # HG changeset patch
    # User Jeroen Demeyer <jdemeyer@cage.ugent.be>
    # Date 1358247979 -3600
    # Node ID 03f5aab83fd264a9baa0dc7703dd3d7127779f97
    # Parent  e52aaa1873d77cda3faf2f36bb69d70ff54a5a84
    Exit Sage gracefully upon SIGHUP or SIGTERM
    
    diff --git a/c_lib/include/interrupt.h b/c_lib/include/interrupt.h
    a b  
    33
    44For documentation about how to use these, see the Developer's Guide.
    55
     6This code distinguishes between two kinds of signals:
     7
     8(1) interrupt-like signals: SIGINT, SIGHUP.  The word
     9"interrupt" refers to any of these signals.  These need not be handled
     10immediately, we might handle them at a suitable later time, outside of
     11sig_block() and with the Python GIL acquired.  SIGINT raises a
     12KeyboardInterrupt (as usual in Python), while SIGHUP raises
     13SystemExit, causing Python to exit.  The latter signals also redirect
     14stdin from /dev/null, to cause interactive sessions to exit also.
     15
     16(2) critical signals: SIGILL, SIGABRT, SIGFPE, SIGBUS, SIGSEGV.
     17These are critical because they cannot be ignored.  If they happen
     18outside of sig_on(), we can only exit Sage with the dreaded
     19"unhandled SIG..." message.  Inside of sig_on(), they can be handled
     20and raise a RuntimeError.
     21
    622
    723AUTHORS:
    824
     
    1026
    1127- Jeroen Demeyer (2010-10-03): almost complete rewrite (#9678)
    1228
     29- Jeroen Demeyer (2013-01-11): handle SIGHUP also (#13908)
     30
    1331*/
    1432
    1533/*****************************************************************************
     
    2341 *                  http://www.gnu.org/licenses/
    2442 ****************************************************************************/
    2543
    26 /* Whether or not to enable interrupt debugging (0: disable, 1: enable) */
     44/* Whether or not to compile debug routines for the interrupt handling
     45 * code (0: disable, 1: enable).  Enabling will make the code slower.
     46 * The debug level itself needs to be set in c_lib/src/interrupt.c */
    2747#define ENABLE_DEBUG_INTERRUPT 0
    2848
    2949
     
    5474#define unlikely(x) (x)
    5575#endif
    5676
     77/* Interrupt debug level */
     78#if ENABLE_DEBUG_INTERRUPT
     79extern int sage_interrupt_debug_level;
     80#endif
    5781
    5882
    5983/* Print a C backtrace if supported by libc */
     
    6488
    6589
    6690/*
    67  * The signal handlers for Sage, one for SIGINT and one for other
    68  * signals.
    69  * Inside sig_on() (i.e. when _signals.sig_on_count is positive), this
    70  * raises an exception and jumps back to sig_on().
     91 * The signal handlers for Sage, one for interrupt-like signals
     92 * (SIGINT, SIGHUP) and one for critical signals like SIGSEGV.
     93 *
     94 * Inside sig_on() (i.e. when _signals.sig_on_count is positive), these
     95 * handlers raise an exception and jump back to sig_on().
    7196 * Outside of sig_on(), sage_interrupt_handler() sets Python's
    7297 * interrupt flag using PyErr_SetInterrupt(); sage_signal_handler()
    7398 * terminates Sage.
    7499 */
    75 void sage_interrupt_handler(int sig);    /* SIGINT */
    76 void sage_signal_handler(int sig);       /* Other signals */
    77 
     100void sage_interrupt_handler(int sig);
     101void sage_signal_handler(int sig);
    78102
    79103/*
    80104 * Setup the signal handlers. It is safe to call this more than once.
     
    101125     * If this is strictly positive, we are inside a sig_on(). */
    102126    volatile sig_atomic_t sig_on_count;
    103127
    104     /* If this is nonzero, check for interrupts using PyErr_Occured()
    105      * during sig_on() and sig_unblock(). This value is increased
    106      * whenever an interrupt happens outside of sig_on() or inside
    107      * sig_block(). */
     128    /* If this is nonzero, it is a signal number of a non-critical
     129     * signal (e.g. SIGINT) which happened during a time when it could
     130     * not be handled.  This may be set when an interrupt occurs either
     131     * outside of sig_on() or inside sig_block().  To avoid race
     132     * conditions, this value may only be changed when all
     133     * interrupt-like signals are masked. */
    108134    volatile sig_atomic_t interrupt_received;
    109135
    110136    /* Are we currently handling a signal inside sage_signal_handler()?
     
    175201 */
    176202#define _sig_on_(message) ( unlikely(_sig_on_prejmp(message, __FILE__, __LINE__)) || _sig_on_postjmp(sigsetjmp(_signals.env,0)) )
    177203
    178 /* This will be called during _sig_on_postjmp() when a SIGINT was
    179  * received *before* the call to sig_on().
    180  * Return 0 if there was an interrupt, 1 otherwise. */
    181 int _sig_on_interrupt_received(void);
     204/* This will be called during _sig_on_postjmp() when an interrupt was
     205 * received *before* the call to sig_on(). */
     206void _sig_on_interrupt_received(void);
    182207
    183208/*
    184209 * Set message, return 0 if we need to sigsetjmp(), return 1 otherwise.
     
    187212{
    188213    _signals.s = message;
    189214#if ENABLE_DEBUG_INTERRUPT
    190     fprintf(stderr, "sig_on (count = %i) at %s:%i\n", _signals.sig_on_count+1, file, line);
    191     fflush(stderr);
     215    if (sage_interrupt_debug_level >= 4)
     216    {
     217        fprintf(stderr, "sig_on (count = %i) at %s:%i\n", _signals.sig_on_count+1, file, line);
     218        fflush(stderr);
     219    }
    192220#endif
    193221    if (_signals.sig_on_count > 0)
    194222    {
     
    229257     * volatile, we can safely evaluate _signals.interrupt_received here
    230258     * without race conditions. */
    231259    if (unlikely(_signals.interrupt_received))
    232         return _sig_on_interrupt_received();
     260    {
     261        _sig_on_interrupt_received();
     262        return 0;
     263    }
    233264
    234265    return 1;
    235266}
     
    244275static inline void _sig_off_(const char* file, int line)
    245276{
    246277#if ENABLE_DEBUG_INTERRUPT
    247     fprintf(stderr, "sig_off (count = %i) at %s:%i\n", _signals.sig_on_count, file, line);
    248     fflush(stderr);
     278    if (sage_interrupt_debug_level >= 4)
     279    {
     280        fprintf(stderr, "sig_off (count = %i) at %s:%i\n", _signals.sig_on_count, file, line);
     281        fflush(stderr);
     282    }
    249283#endif
    250284    if (unlikely(_signals.sig_on_count <= 0))
    251285    {
     
    284318static inline int sig_check()
    285319{
    286320    if (unlikely(_signals.interrupt_received) && _signals.sig_on_count == 0)
    287         return _sig_on_interrupt_received();
     321    {
     322        _sig_on_interrupt_received();
     323        return 0;
     324    }
    288325
    289326    return 1;
    290327}
     
    336373    _signals.block_sigint = 0;
    337374
    338375    if (unlikely(_signals.interrupt_received) && _signals.sig_on_count > 0)
    339         kill(getpid(), SIGINT);  /* Re-raise the interrupt */
     376        kill(getpid(), _signals.interrupt_received);  /* Re-raise the signal */
    340377}
    341378
    342379
  • c_lib/src/interrupt.c

    diff --git a/c_lib/src/interrupt.c b/c_lib/src/interrupt.c
    a b  
    77
    88- Jeroen Demeyer (2010-10-03): almost complete rewrite (#9678)
    99
     10- Jeroen Demeyer (2013-01-11): handle SIGHUP also (#13908)
     11
    1012*/
    1113
    1214/*****************************************************************************
     
    2325#include <stdio.h>
    2426#include <string.h>
    2527#include <limits.h>
     28#include <sys/time.h>
    2629/* glibc has a backtrace() command since version 2.1 */
    2730#ifdef __GLIBC__
    2831#if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 1)
     
    3437#include "interrupt.h"
    3538
    3639
     40/* Interrupt debug level.  This only works if ENABLE_DEBUG_INTERRUPT
     41 * has been set to "1" in c_lib/include/interrupt.h */
     42#if ENABLE_DEBUG_INTERRUPT
     43int sage_interrupt_debug_level = 2;
     44static struct timeval sigtime;  /* Time of signal */
     45#endif
     46
     47
    3748struct sage_signals_t _signals;
    3849
    3950/* The default signal mask during normal operation,
    4051 * initialized by setup_sage_signal_handler(). */
    4152static sigset_t default_sigmask;
    4253
    43 /* default_sigmask with SIGINT and SIGALRM added. */
     54/* default_sigmask with SIGHUP, SIGINT, SIGALRM added. */
    4455static sigset_t sigmask_with_sigint;
    4556
    4657/* Does this processor support the x86 EMMS instruction? */
     
    8394}
    8495
    8596
    86 /* Handler for SIGINT */
     97/* Handler for SIGHUP, SIGINT */
    8798void sage_interrupt_handler(int sig)
    8899{
    89100#if ENABLE_DEBUG_INTERRUPT
    90     fprintf(stderr, "\n*** SIGINT *** %s sig_on\n", (_signals.sig_on_count > 0) ? "inside" : "outside");
    91     print_backtrace();
     101    if (sage_interrupt_debug_level >= 1) {
     102        fprintf(stderr, "\n*** SIG %i *** %s sig_on\n", sig, (_signals.sig_on_count > 0) ? "inside" : "outside");
     103        if (sage_interrupt_debug_level >= 3) print_backtrace();
     104        fflush(stderr);
     105        /* Store time of this signal, unless there is already a
     106         * pending signal. */
     107        if (!_signals.interrupt_received) gettimeofday(&sigtime, NULL);
     108    }
    92109#endif
    93110
    94111    if (_signals.sig_on_count > 0)
     
    105122    }
    106123    else
    107124    {
    108         /* Set an internal Python flag that an interrupt has been
    109          * raised.  This will not immediately raise an exception, only
    110          * on the next call of PyErr_CheckSignals().  We cannot simply
    111          * raise an exception here because of Python's "global
    112          * interpreter lock" -- Jeroen Demeyer */
     125        /* Set the Python interrupt indicator, which will cause the
     126         * Python-level interrupt handler in sage/ext/c_lib.pyx to be
     127         * called. */
    113128        PyErr_SetInterrupt();
    114129    }
    115130
    116131    /* If we are here, we cannot handle the interrupt immediately, so
    117      * we set interrupt_received for later use. */
    118     _signals.interrupt_received = 1;
     132     * we store the signal number for later use.  But make sure we
     133     * don't overwrite a SIGHUP or SIGTERM which we already received. */
     134    if (_signals.interrupt_received != SIGHUP && _signals.interrupt_received != SIGTERM)
     135        _signals.interrupt_received = sig;
    119136}
    120137
    121138/* Handler for SIGILL, SIGABRT, SIGFPE, SIGBUS, SIGSEGV */
     
    128145    {
    129146        /* We are inside sig_on(), so we can handle the signal! */
    130147#if ENABLE_DEBUG_INTERRUPT
    131         fprintf(stderr, "\n*** SIG %i *** inside sig_on\n", sig);
    132         print_backtrace();
     148        if (sage_interrupt_debug_level >= 1) {
     149            fprintf(stderr, "\n*** SIG %i *** inside sig_on\n", sig);
     150            if (sage_interrupt_debug_level >= 3) print_backtrace();
     151            fflush(stderr);
     152            gettimeofday(&sigtime, NULL);
     153        }
    133154#endif
    134155
    135156        /* Actually raise an exception so Python can see it */
     
    145166
    146167        /* Reset all signals to their default behaviour and unblock
    147168         * them in case something goes wrong as of now. */
     169        signal(SIGHUP, SIG_DFL);
     170        signal(SIGINT, SIG_DFL);
    148171        signal(SIGILL, SIG_DFL);
    149172        signal(SIGABRT, SIG_DFL);
    150173        signal(SIGFPE, SIG_DFL);
    151174        signal(SIGBUS, SIG_DFL);
    152175        signal(SIGSEGV, SIG_DFL);
    153         sigprocmask(SIG_SETMASK, &sigmask_with_sigint, NULL);
     176        signal(SIGTERM, SIG_DFL);
     177        sigprocmask(SIG_SETMASK, &default_sigmask, NULL);
    154178
    155179        if (inside) sigdie(sig, "An error occured during signal handling.");
    156180
     
    181205void sig_raise_exception(int sig)
    182206{
    183207#if ENABLE_DEBUG_INTERRUPT
    184     fprintf(stderr, "sig_raise_exception(sig=%i)\nPyErr_Occurred() = %p\nRaising Python exception...\n",
    185         sig, PyErr_Occurred());
    186     fflush(stderr);
     208    struct timeval raisetime;
     209    if (sage_interrupt_debug_level >= 2) {
     210        gettimeofday(&raisetime, NULL);
     211        long delta_ms = (raisetime.tv_sec - sigtime.tv_sec)*1000L + ((long)raisetime.tv_usec - (long)sigtime.tv_usec)/1000;
     212        fprintf(stderr, "sig_raise_exception(sig=%i)\nPyErr_Occurred() = %p\nRaising Python exception %li ms after signal...\n",
     213            sig, PyErr_Occurred(), delta_ms);
     214        fflush(stderr);
     215    }
    187216#endif
    188217
    189218    /* String to be printed in the Python exception */
     
    191220
    192221    switch(sig)
    193222    {
     223        case SIGHUP:
     224        case SIGTERM:
     225            /* Redirect stdin from /dev/null to close interactive sessions */
     226            freopen("/dev/null", "r", stdin);
     227
     228            /* This causes Python to exit */
     229            PyErr_SetNone(PyExc_SystemExit);
     230            break;
    194231        case SIGINT:
    195232            PyErr_SetNone(PyExc_KeyboardInterrupt);
    196233            break;
     
    221258
    222259
    223260/* Handle an interrupt before sig_on(). */
    224 int _sig_on_interrupt_received()
     261void _sig_on_interrupt_received()
    225262{
     263    /* Momentarily block signals to avoid race conditions */
     264    sigset_t oldset;
     265    sigprocmask(SIG_BLOCK, &sigmask_with_sigint, &oldset);
     266
     267    sig_raise_exception(_signals.interrupt_received);
    226268    _signals.interrupt_received = 0;
    227     if (PyErr_CheckSignals())
    228     {
    229         _signals.sig_on_count = 0;
    230         return 0;
    231     }
    232     return 1;
     269    _signals.sig_on_count = 0;
     270
     271    sigprocmask(SIG_SETMASK, &oldset, NULL);
    233272}
    234273
    235274/* Recover after siglongjmp() */
     
    237276{
    238277    _signals.block_sigint = 0;
    239278    _signals.sig_on_count = 0;
     279    _signals.interrupt_received = 0;
    240280
    241281    /* Reset signal mask */
    242282    sigprocmask(SIG_SETMASK, &default_sigmask, NULL);
     
    266306    /* Save the default signal mask */
    267307    sigprocmask(SIG_BLOCK, NULL, &default_sigmask);
    268308
    269     /* Save the signal mask with SIGINT and SIGALRM */
     309    /* Save the signal mask with non-critical signals blocked */
    270310    sigprocmask(SIG_BLOCK, NULL, &sigmask_with_sigint);
     311    sigaddset(&sigmask_with_sigint, SIGHUP);
    271312    sigaddset(&sigmask_with_sigint, SIGINT);
    272313    sigaddset(&sigmask_with_sigint, SIGALRM);
    273314
    274315    /* Install signal handlers */
    275316    struct sigaction sa;
    276317    memset(&sa, 0, sizeof(sa));
    277     /* Block SIGINT and SIGALRM during the signal handlers */
     318    /* Block non-critical signals during the signal handlers */
    278319    sigemptyset(&sa.sa_mask);
     320    sigaddset(&sa.sa_mask, SIGHUP);
    279321    sigaddset(&sa.sa_mask, SIGINT);
    280322    sigaddset(&sa.sa_mask, SIGALRM);
    281323
    282324    sa.sa_handler = sage_interrupt_handler;
     325    if (sigaction(SIGHUP, &sa, NULL)) {perror("sigaction"); exit(1);}
    283326    if (sigaction(SIGINT, &sa, NULL)) {perror("sigaction"); exit(1);}
    284327    sa.sa_handler = sage_signal_handler;
    285328    /* Allow signals during signal handling, we have code to deal with
  • sage/ext/c_lib.pyx

    diff --git a/sage/ext/c_lib.pyx b/sage/ext/c_lib.pyx
    a b  
    2424
    2525def _init_csage():
    2626    """
    27     Call init_csage().
     27    Call init_csage() and enable interrupts.
    2828
    2929    This is normally done exactly once during Sage startup from
    3030    sage/all.py
    3131    """
     32    # Set the Python-level interrupt handler. When a SIGINT occurs,
     33    # this will not be called directly. Instead, a SIGINT is caught by
     34    # the libcsage (c_lib) interrupt handler. If it happens during pure
     35    # Python code (not within sig_on()/sig_off()), the handler will set
     36    # Python's interrupt flag. Python regularly checks this and will
     37    # call its interrupt handler (which is the one we set now). This
     38    # handler issues a sig_check() which finally raises the
     39    # KeyboardInterrupt exception.
     40    import signal
     41    signal.signal(signal.SIGINT, sage_python_check_interrupt)
     42
    3243    init_csage()
    3344
    3445
     
    4657        0
    4758    """
    4859    return _signals.sig_on_count
     60
     61
     62def sage_python_check_interrupt(sig, frame):
     63    """
     64    Python-level interrupt handler for interrupts raised in Python
     65    code. This simply delegates to the interrupt handling code in
     66    libcsage (c_lib).
     67    """
     68    sig_check()
  • sage/tests/interrupt.pyx

    diff --git a/sage/tests/interrupt.pyx b/sage/tests/interrupt.pyx
    a b  
    790790    cdef int i
    791791    for i in range(1000000):
    792792        sig_check()
     793
     794
     795########################################################################
     796# Test SIGHUP                                                          #
     797########################################################################
     798@return_exception
     799def test_sighup(long delay = DEFAULT_DELAY):
     800    """
     801    Test a basic SIGHUP signal, which would normally exit Sage by
     802    raising ``SystemExit``.
     803
     804    TESTS::
     805
     806        sage: from sage.tests.interrupt import *
     807        sage: test_sighup()
     808        SystemExit()
     809    """
     810    signal_after_delay(SIGHUP, delay)
     811    while True:
     812        sig_check()
     813
     814@return_exception
     815def test_sigterm_and_sigint(long delay = DEFAULT_DELAY):
     816    """
     817    Test a SIGHUP and a SIGINT arriving at essentially the same time.
     818    The SIGINT should be ignored and we should get a ``SystemExit``.
     819
     820    TESTS::
     821
     822        sage: from sage.tests.interrupt import *
     823        sage: test_sigterm_and_sigint()
     824        SystemExit()
     825    """
     826    sig_on()
     827    sig_block()
     828    signal_after_delay(SIGHUP, delay)
     829    signal_after_delay(SIGINT, delay)
     830    # 3 sleeps to ensure both signals arrive
     831    ms_sleep(delay)
     832    ms_sleep(delay)
     833    ms_sleep(delay)
     834    sig_unblock()
     835    sig_off()
     836
     837def test_graceful_exit():
     838    r"""
     839    TESTS:
     840
     841    Start a Sage subprocess, spawn a child PARI/GP process and kill the
     842    Sage process.  The PARI/GP process should exit by itself. ::
     843
     844        sage: from subprocess import *
     845        sage: from signal import *
     846        sage: P = Popen(['sage-ipython'], stdin=PIPE, stdout=PIPE, stderr=PIPE)  # long time
     847        sage: P.stdin.write('from sage.tests.interrupt import *\n')  # long time
     848        sage: P.stdin.write('test_graceful_exit()\n')  # long time
     849
     850    Now read from the child until we read ``"GO"``.  This ensures that
     851    the child Sage process has properly started before we terminate it::
     852
     853        sage: while "GO" not in P.stdout.readline(): pass  # long time
     854        sage: os.kill(P.pid, SIGHUP)  # long time
     855        sage: P.stdout.read()  # long time
     856        '...Exiting spawned PARI/GP interpreter process...'
     857        sage: P.wait()  # long time
     858        0
     859    """
     860    # This code is executed in the subprocess
     861    import os, sys
     862    from sage.interfaces.gp import gp
     863
     864    # Keep PARI/GP busy
     865    gp(0)  # Ensure PARI/GP is started
     866    gp._expect.sendline("factor(2^1000-3);")
     867
     868    # Print something to synchronize with the parent
     869    print("GO")
     870    sys.stdout.flush()
     871
     872    # Wait to be killed...
     873    sig_on()
     874    infinite_loop()