Skip to content

Commit

Permalink
pid namespaces: define is_global_init() and is_container_init()
Browse files Browse the repository at this point in the history
is_init() is an ambiguous name for the pid==1 check.  Split it into
is_global_init() and is_container_init().

A cgroup init has it's tsk->pid == 1.

A global init also has it's tsk->pid == 1 and it's active pid namespace
is the init_pid_ns.  But rather than check the active pid namespace,
compare the task structure with 'init_pid_ns.child_reaper', which is
initialized during boot to the /sbin/init process and never changes.

Changelog:

	2.6.22-rc4-mm2-pidns1:
	- Use 'init_pid_ns.child_reaper' to determine if a given task is the
	  global init (/sbin/init) process. This would improve performance
	  and remove dependence on the task_pid().

	2.6.21-mm2-pidns2:

	- [Sukadev Bhattiprolu] Changed is_container_init() calls in {powerpc,
	  ppc,avr32}/traps.c for the _exception() call to is_global_init().
	  This way, we kill only the cgroup if the cgroup's init has a
	  bug rather than force a kernel panic.

[[email protected]: fix comment]
[[email protected]: Use is_global_init() in arch/m32r/mm/fault.c]
[[email protected]: kernel/pid.c: remove unused exports]
[[email protected]: Fix capability.c to work with threaded init]
Signed-off-by: Serge E. Hallyn <[email protected]>
Signed-off-by: Sukadev Bhattiprolu <[email protected]>
Acked-by: Pavel Emelianov <[email protected]>
Cc: Eric W. Biederman <[email protected]>
Cc: Cedric Le Goater <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Herbert Poetzel <[email protected]>
Cc: Kirill Korotaev <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Serge E. Hallyn authored and Linus Torvalds committed Oct 19, 2007
1 parent 3743ca0 commit b460cbc
Show file tree
Hide file tree
Showing 32 changed files with 52 additions and 37 deletions.
2 changes: 1 addition & 1 deletion arch/alpha/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
/* We ran out of memory, or some other thing happened to us that
made us unable to handle the page fault gracefully. */
out_of_memory:
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/arm/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
return fault;

out_of_memory:
if (!is_init(tsk))
if (!is_global_init(tsk))
goto out;

/*
Expand Down
2 changes: 1 addition & 1 deletion arch/avr32/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
* generate the same exception over and over again and we get
* nowhere. Better to kill it and let the kernel panic.
*/
if (is_init(current)) {
if (is_global_init(current)) {
__sighandler_t handler;

spin_lock_irq(&current->sighand->siglock);
Expand Down
6 changes: 3 additions & 3 deletions arch/avr32/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
if (exception_trace && printk_ratelimit())
printk("%s%s[%d]: segfault at %08lx pc %08lx "
"sp %08lx ecr %lu\n",
is_init(tsk) ? KERN_EMERG : KERN_INFO,
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
tsk->comm, tsk->pid, address, regs->pc,
regs->sp, ecr);
_exception(SIGSEGV, regs, code, address);
Expand Down Expand Up @@ -209,7 +209,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand All @@ -231,7 +231,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
if (exception_trace)
printk("%s%s[%d]: bus error at %08lx pc %08lx "
"sp %08lx ecr %lu\n",
is_init(tsk) ? KERN_EMERG : KERN_INFO,
is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
tsk->comm, tsk->pid, address, regs->pc,
regs->sp, ecr);

Expand Down
2 changes: 1 addition & 1 deletion arch/ia64/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re

out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/m32r/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(tsk)) {
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/m68k/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(tsk)) {
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
* generate the same exception over and over again and we get
* nowhere. Better to kill it and let the kernel panic.
*/
if (is_init(current)) {
if (is_global_init(current)) {
__sighandler_t handler;

spin_lock_irq(&current->sighand->siglock);
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/platforms/pseries/ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
err->disposition == RTAS_DISP_NOT_RECOVERED &&
err->target == RTAS_TARGET_MEMORY &&
err->type == RTAS_TYPE_ECC_UNCORR &&
!(current->pid == 0 || is_init(current))) {
!(current->pid == 0 || is_global_init(current))) {
/* Kill off a user process with an ECC error */
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
current->pid);
Expand Down
2 changes: 1 addition & 1 deletion arch/ppc/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
* generate the same exception over and over again and we get
* nowhere. Better to kill it and let the kernel panic.
*/
if (is_init(current)) {
if (is_global_init(current)) {
__sighandler_t handler;

spin_lock_irq(&current->sighand->siglock);
Expand Down
2 changes: 1 addition & 1 deletion arch/ppc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/lib/uaccess_pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ static int __handle_fault(struct mm_struct *mm, unsigned long address,

out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm = tsk->mm;

up_read(&mm->mmap_sem);
if (is_init(tsk)) {
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
return 1;
Expand Down
2 changes: 1 addition & 1 deletion arch/sh/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
6 changes: 3 additions & 3 deletions arch/sh64/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
show_regs(regs);
#endif
}
if (is_init(tsk)) {
if (is_global_init(tsk)) {
panic("INIT had user mode bad_area\n");
}
tsk->thread.address = address;
Expand Down Expand Up @@ -320,14 +320,14 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
* us unable to handle the page fault gracefully.
*/
out_of_memory:
if (is_init(current)) {
if (is_global_init(current)) {
panic("INIT out of memory\n");
yield();
goto survive;
}
printk("fault:Out of memory\n");
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/um/kernel/trap.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
* us unable to handle the page fault gracefully.
*/
out_of_memory:
if (is_init(current)) {
if (is_global_init(current)) {
up_read(&mm->mmap_sem);
yield();
down_read(&mm->mmap_sem);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/lib/usercopy_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
retval = get_user_pages(current, current->mm,
(unsigned long )to, 1, 1, 0, &pg, NULL);

if (retval == -ENOMEM && is_init(current)) {
if (retval == -ENOMEM && is_global_init(current)) {
up_read(&current->mm->mmap_sem);
congestion_wait(WRITE, HZ/50);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/fault_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(tsk)) {
if (is_global_init(tsk)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/mm/fault_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
goto again;
}
Expand Down
2 changes: 1 addition & 1 deletion arch/xtensa/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ void do_page_fault(struct pt_regs *regs)
*/
out_of_memory:
up_read(&mm->mmap_sem);
if (is_init(current)) {
if (is_global_init(current)) {
yield();
down_read(&mm->mmap_sem);
goto survive;
Expand Down
2 changes: 1 addition & 1 deletion drivers/char/sysrq.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ static void send_sig_all(int sig)
struct task_struct *p;

for_each_process(p) {
if (p->mm && !is_init(p))
if (p->mm && !is_global_init(p))
/* Not swapper, init nor kernel thread */
force_sig(sig, p);
}
Expand Down
12 changes: 10 additions & 2 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1237,12 +1237,20 @@ static inline int pid_alive(struct task_struct *p)
}

/**
* is_init - check if a task structure is init
* is_global_init - check if a task structure is init
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
*
* TODO: We should inline this function after some cleanups in pid_namespace.h
*/
extern int is_global_init(struct task_struct *tsk);

/*
* is_container_init:
* check whether in the task is init in its own pid namespace.
*/
static inline int is_init(struct task_struct *tsk)
static inline int is_container_init(struct task_struct *tsk)
{
return tsk->pid == 1;
}
Expand Down
3 changes: 2 additions & 1 deletion kernel/capability.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/pid_namespace.h>
#include <asm/uaccess.h>

/*
Expand Down Expand Up @@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
int found = 0;

do_each_thread(g, target) {
if (target == current || is_init(target))
if (target == current || is_container_init(target->group_leader))
continue;
found = 1;
if (security_capset_check(target, effective, inheritable,
Expand Down
2 changes: 1 addition & 1 deletion kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
if (p == ignored_task
|| p->exit_state
|| is_init(p->real_parent))
|| is_global_init(p->real_parent))
continue;
if (task_pgrp(p->real_parent) != pgrp &&
task_session(p->real_parent) == task_session(p)) {
Expand Down
2 changes: 1 addition & 1 deletion kernel/kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct resource crashk_res = {

int kexec_should_crash(struct task_struct *p)
{
if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
return 1;
return 0;
}
Expand Down
5 changes: 5 additions & 0 deletions kernel/pid.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ struct pid_namespace init_pid_ns = {
.child_reaper = &init_task
};

int is_global_init(struct task_struct *tsk)
{
return tsk == init_pid_ns.child_reaper;
}

/*
* Note: disable interrupts while the pidmap_lock is held as an
* interrupt might come in and do read_lock(&tasklist_lock).
Expand Down
2 changes: 1 addition & 1 deletion kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)

int unhandled_signal(struct task_struct *tsk, int sig)
{
if (is_init(tsk))
if (is_global_init(tsk))
return 1;
if (tsk->ptrace & PT_PTRACED)
return 0;
Expand Down
2 changes: 1 addition & 1 deletion kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
return -EPERM;
}

op = is_init(current) ? OP_SET : OP_AND;
op = is_global_init(current) ? OP_SET : OP_AND;
return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
do_proc_dointvec_bset_conv,&op);
}
Expand Down
4 changes: 2 additions & 2 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
if (!p->mm)
continue;
/* skip the init task */
if (is_init(p))
if (is_global_init(p))
continue;

/*
Expand Down Expand Up @@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
*/
static void __oom_kill_task(struct task_struct *p, int verbose)
{
if (is_init(p)) {
if (is_global_init(p)) {
WARN_ON(1);
printk(KERN_WARNING "tried to kill init!\n");
return;
Expand Down
3 changes: 2 additions & 1 deletion security/commoncap.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/xattr.h>
#include <linux/hugetlb.h>
#include <linux/mount.h>
#include <linux/sched.h>

#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
/*
Expand Down Expand Up @@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
/* For init, we want to retain the capabilities set
* in the init_task struct. Thus we skip the usual
* capability rules */
if (!is_init(current)) {
if (!is_global_init(current)) {
current->cap_permitted = new_permitted;
current->cap_effective = bprm->cap_effective ?
new_permitted : 0;
Expand Down

0 comments on commit b460cbc

Please sign in to comment.