Skip to content

Commit

Permalink
Merge tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub…
Browse files Browse the repository at this point in the history
…/scm/linux/kernel/git/ebiederm/user-namespace

Pull ptrace_stop cleanups from Eric Biederman:
 "While looking at the ptrace problems with PREEMPT_RT and the problems
  Peter Zijlstra was encountering with ptrace in his freezer rewrite I
  identified some cleanups to ptrace_stop that make sense on their own
  and move make resolving the other problems much simpler.

  The biggest issue is the habit of the ptrace code to change
  task->__state from the tracer to suppress TASK_WAKEKILL from waking up
  the tracee. No other code in the kernel does that and it is straight
  forward to update signal_wake_up and friends to make that unnecessary.

  Peter's task freezer sets frozen tasks to a new state TASK_FROZEN and
  then it stores them by calling "wake_up_state(t, TASK_FROZEN)" relying
  on the fact that all stopped states except the special stop states can
  tolerate spurious wake up and recover their state.

  The state of stopped and traced tasked is changed to be stored in
  task->jobctl as well as in task->__state. This makes it possible for
  the freezer to recover tasks in these special states, as well as
  serving as a general cleanup. With a little more work in that
  direction I believe TASK_STOPPED can learn to tolerate spurious wake
  ups and become an ordinary stop state.

  The TASK_TRACED state has to remain a special state as the registers
  for a process are only reliably available when the process is stopped
  in the scheduler. Fundamentally ptrace needs acess to the saved
  register values of a task.

  There are bunch of semi-random ptrace related cleanups that were found
  while looking at these issues.

  One cleanup that deserves to be called out is from commit 57b6de0
  ("ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs"). This
  makes a change that is technically user space visible, in the handling
  of what happens to a tracee when a tracer dies unexpectedly. According
  to our testing and our understanding of userspace nothing cares that
  spurious SIGTRAPs can be generated in that case"

* tag 'ptrace_stop-cleanup-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state
  ptrace: Always take siglock in ptrace_resume
  ptrace: Don't change __state
  ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs
  ptrace: Document that wait_task_inactive can't fail
  ptrace: Reimplement PTRACE_KILL by always sending SIGKILL
  signal: Use lockdep_assert_held instead of assert_spin_locked
  ptrace: Remove arch_ptrace_attach
  ptrace/xtensa: Replace PT_SINGLESTEP with TIF_SINGLESTEP
  ptrace/um: Replace PT_DTRACE with TIF_SINGLESTEP
  signal: Replace __group_send_sig_info with send_signal_locked
  signal: Rename send_signal send_signal_locked
  • Loading branch information
torvalds committed Jun 3, 2022
2 parents 1ec6574 + 31cae1e commit 67850b7
Show file tree
Hide file tree
Showing 20 changed files with 140 additions and 240 deletions.
4 changes: 0 additions & 4 deletions arch/ia64/include/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,6 @@ static inline long regs_return_value(struct pt_regs *regs)
#define arch_ptrace_stop_needed() \
(!test_thread_flag(TIF_RESTORE_RSE))

extern void ptrace_attach_sync_user_rbs (struct task_struct *);
#define arch_ptrace_attach(child) \
ptrace_attach_sync_user_rbs(child)

#define arch_has_single_step() (1)
#define arch_has_block_step() (1)

Expand Down
57 changes: 0 additions & 57 deletions arch/ia64/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -617,63 +617,6 @@ void ia64_sync_krbs(void)
unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs);
}

/*
* After PTRACE_ATTACH, a thread's register backing store area in user
* space is assumed to contain correct data whenever the thread is
* stopped. arch_ptrace_stop takes care of this on tracing stops.
* But if the child was already stopped for job control when we attach
* to it, then it might not ever get into ptrace_stop by the time we
* want to examine the user memory containing the RBS.
*/
void
ptrace_attach_sync_user_rbs (struct task_struct *child)
{
int stopped = 0;
struct unw_frame_info info;

/*
* If the child is in TASK_STOPPED, we need to change that to
* TASK_TRACED momentarily while we operate on it. This ensures
* that the child won't be woken up and return to user mode while
* we are doing the sync. (It can only be woken up for SIGKILL.)
*/

read_lock(&tasklist_lock);
if (child->sighand) {
spin_lock_irq(&child->sighand->siglock);
if (READ_ONCE(child->__state) == TASK_STOPPED &&
!test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
set_notify_resume(child);

WRITE_ONCE(child->__state, TASK_TRACED);
stopped = 1;
}
spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);

if (!stopped)
return;

unw_init_from_blocked_task(&info, child);
do_sync_rbs(&info, ia64_sync_user_rbs);

/*
* Now move the child back into TASK_STOPPED if it should be in a
* job control stop, so that SIGCONT can be used to wake it up.
*/
read_lock(&tasklist_lock);
if (child->sighand) {
spin_lock_irq(&child->sighand->siglock);
if (READ_ONCE(child->__state) == TASK_TRACED &&
(child->signal->flags & SIGNAL_STOP_STOPPED)) {
WRITE_ONCE(child->__state, TASK_STOPPED);
}
spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
}

/*
* Write f32-f127 back to task->thread.fph if it has been modified.
*/
Expand Down
2 changes: 2 additions & 0 deletions arch/um/include/asm/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 7
#define TIF_NOTIFY_RESUME 8
#define TIF_SECCOMP 9 /* secure computing */
#define TIF_SINGLESTEP 10 /* single stepping userspace */

#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
Expand All @@ -68,5 +69,6 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)

#endif
2 changes: 1 addition & 1 deletion arch/um/kernel/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
{
PT_REGS_IP(regs) = eip;
PT_REGS_SP(regs) = esp;
current->ptrace &= ~PT_DTRACE;
clear_thread_flag(TIF_SINGLESTEP);
#ifdef SUBARCH_EXECVE1
SUBARCH_EXECVE1(regs->regs);
#endif
Expand Down
2 changes: 1 addition & 1 deletion arch/um/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ int singlestepping(void * t)
{
struct task_struct *task = t ? t : current;

if (!(task->ptrace & PT_DTRACE))
if (!test_thread_flag(TIF_SINGLESTEP))
return 0;

if (task->thread.singlestep_syscall)
Expand Down
8 changes: 4 additions & 4 deletions arch/um/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

void user_enable_single_step(struct task_struct *child)
{
child->ptrace |= PT_DTRACE;
set_tsk_thread_flag(child, TIF_SINGLESTEP);
child->thread.singlestep_syscall = 0;

#ifdef SUBARCH_SET_SINGLESTEPPING
Expand All @@ -21,7 +21,7 @@ void user_enable_single_step(struct task_struct *child)

void user_disable_single_step(struct task_struct *child)
{
child->ptrace &= ~PT_DTRACE;
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
child->thread.singlestep_syscall = 0;

#ifdef SUBARCH_SET_SINGLESTEPPING
Expand Down Expand Up @@ -120,7 +120,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code)
}

/*
* XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
* XXX Check TIF_SINGLESTEP for singlestepping check and
* PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
*/
int syscall_trace_enter(struct pt_regs *regs)
Expand All @@ -144,7 +144,7 @@ void syscall_trace_leave(struct pt_regs *regs)
audit_syscall_exit(regs);

/* Fake a debug trap */
if (ptraced & PT_DTRACE)
if (test_thread_flag(TIF_SINGLESTEP))
send_sigtrap(&regs->regs, 0);

if (!test_thread_flag(TIF_SYSCALL_TRACE))
Expand Down
4 changes: 2 additions & 2 deletions arch/um/kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
unsigned long sp;
int err;

if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED))
singlestep = 1;

/* Did we come from a system call? */
Expand Down Expand Up @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs)
* on the host. The tracing thread will check this flag and
* PTRACE_SYSCALL if necessary.
*/
if (current->ptrace & PT_DTRACE)
if (test_thread_flag(TIF_SINGLESTEP))
current->thread.singlestep_syscall =
is_syscall(PT_REGS_IP(&current->thread.regs));

Expand Down
3 changes: 1 addition & 2 deletions arch/x86/kernel/step.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,7 @@ void set_task_blockstep(struct task_struct *task, bool on)
*
* NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
* task is current or it can't be running, otherwise we can race
* with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
* PTRACE_KILL is not safe.
* with __switch_to_xtra(). We rely on ptrace_freeze_traced().
*/
local_irq_disable();
debugctl = get_debugctlmsr();
Expand Down
4 changes: 2 additions & 2 deletions arch/xtensa/kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)

void user_enable_single_step(struct task_struct *child)
{
child->ptrace |= PT_SINGLESTEP;
set_tsk_thread_flag(child, TIF_SINGLESTEP);
}

void user_disable_single_step(struct task_struct *child)
{
child->ptrace &= ~PT_SINGLESTEP;
clear_tsk_thread_flag(child, TIF_SINGLESTEP);
}

/*
Expand Down
4 changes: 2 additions & 2 deletions arch/xtensa/kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ static void do_signal(struct pt_regs *regs)
/* Set up the stack frame */
ret = setup_frame(&ksig, sigmask_to_save(), regs);
signal_setup_done(ret, &ksig, 0);
if (current->ptrace & PT_SINGLESTEP)
if (test_thread_flag(TIF_SINGLESTEP))
task_pt_regs(current)->icountlevel = 1;

return;
Expand All @@ -498,7 +498,7 @@ static void do_signal(struct pt_regs *regs)
/* If there's no signal to deliver, we just restore the saved mask. */
restore_saved_sigmask();

if (current->ptrace & PT_SINGLESTEP)
if (test_thread_flag(TIF_SINGLESTEP))
task_pt_regs(current)->icountlevel = 1;
return;
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/tty/tty_jobctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ int tty_signal_session_leader(struct tty_struct *tty, int exit_session)
spin_unlock_irq(&p->sighand->siglock);
continue;
}
__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID);
send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID);
put_pid(p->signal->tty_old_pgrp); /* A noop */
spin_lock(&tty->ctrl.lock);
tty_pgrp = get_pid(tty->ctrl.pgrp);
Expand Down
7 changes: 0 additions & 7 deletions include/linux/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,

#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 /* delayed trace (used on um) */

#define PT_OPT_FLAG_SHIFT 3
/* PT_TRACE_* event enable flags */
Expand All @@ -47,12 +46,6 @@ extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)

/* single stepping state bits (used on ARM and PA-RISC) */
#define PT_SINGLESTEP_BIT 31
#define PT_SINGLESTEP (1<<PT_SINGLESTEP_BIT)
#define PT_BLOCKSTEP_BIT 30
#define PT_BLOCKSTEP (1<<PT_BLOCKSTEP_BIT)

extern long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
Expand Down
10 changes: 4 additions & 6 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ struct task_group;
/* Convenience macros for the sake of set_current_state: */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
#define TASK_TRACED __TASK_TRACED

#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

Expand All @@ -118,11 +118,9 @@ struct task_group;

#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)

#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)

#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)

#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0)

/*
* Special states are those that do not use the normal wait-loop pattern. See
Expand Down
8 changes: 8 additions & 0 deletions include/linux/sched/jobctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ struct task_struct;
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
#define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */

#define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */
#define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */

#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
Expand All @@ -28,6 +32,10 @@ struct task_struct;
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
#define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT)

#define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT)
#define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT)

#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
Expand Down
20 changes: 16 additions & 4 deletions include/linux/sched/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,10 @@ static inline int kernel_dequeue_signal(void)
static inline void kernel_signal_stop(void)
{
spin_lock_irq(&current->sighand->siglock);
if (current->jobctl & JOBCTL_STOP_DEQUEUED)
if (current->jobctl & JOBCTL_STOP_DEQUEUED) {
current->jobctl |= JOBCTL_STOPPED;
set_special_state(TASK_STOPPED);
}
spin_unlock_irq(&current->sighand->siglock);

schedule();
Expand Down Expand Up @@ -444,13 +446,23 @@ extern void calculate_sigpending(void);

extern void signal_wake_up_state(struct task_struct *t, unsigned int state);

static inline void signal_wake_up(struct task_struct *t, bool resume)
static inline void signal_wake_up(struct task_struct *t, bool fatal)
{
signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
unsigned int state = 0;
if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) {
t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED);
state = TASK_WAKEKILL | __TASK_TRACED;
}
signal_wake_up_state(t, state);
}
static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
{
signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
unsigned int state = 0;
if (resume) {
t->jobctl &= ~JOBCTL_TRACED;
state = __TASK_TRACED;
}
signal_wake_up_state(t, state);
}

void task_join_group_stop(struct task_struct *task);
Expand Down
3 changes: 2 additions & 1 deletion include/linux/signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ extern int do_send_sig_info(int sig, struct kernel_siginfo *info,
struct task_struct *p, enum pid_type type);
extern int group_send_sig_info(int sig, struct kernel_siginfo *info,
struct task_struct *p, enum pid_type type);
extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
extern int send_signal_locked(int sig, struct kernel_siginfo *info,
struct task_struct *p, enum pid_type type);
extern int sigprocmask(int, sigset_t *, sigset_t *);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
Expand Down
Loading

0 comments on commit 67850b7

Please sign in to comment.