Skip to content

Commit

Permalink
define new percpu interface for shared data
Browse files Browse the repository at this point in the history
per cpu data section contains two types of data.  One set which is
exclusively accessed by the local cpu and the other set which is per cpu,
but also shared by remote cpus.  In the current kernel, these two sets are
not clearely separated out.  This can potentially cause the same data
cacheline shared between the two sets of data, which will result in
unnecessary bouncing of the cacheline between cpus.

One way to fix the problem is to cacheline align the remotely accessed per
cpu data, both at the beginning and at the end.  Because of the padding at
both ends, this will likely cause some memory wastage and also the
interface to achieve this is not clean.

This patch:

Moves the remotely accessed per cpu data (which is currently marked
as ____cacheline_aligned_in_smp) into a different section, where all the data
elements are cacheline aligned. And as such, this differentiates the local
only data and remotely accessed data cleanly.

Signed-off-by: Fenghua Yu <[email protected]>
Acked-by: Suresh Siddha <[email protected]>
Cc: Rusty Russell <[email protected]>
Cc: Christoph Lameter <[email protected]>
Cc: <[email protected]>
Cc: "Luck, Tony" <[email protected]>
Cc: Andi Kleen <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
fyu1 authored and Linus Torvalds committed Jul 19, 2007
1 parent 3d7e338 commit 5fb7dc3
Show file tree
Hide file tree
Showing 26 changed files with 84 additions and 53 deletions.
5 changes: 1 addition & 4 deletions arch/alpha/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ SECTIONS
. = ALIGN(8);
SECURITY_INIT

. = ALIGN(8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(8192)

. = ALIGN(2*8192);
__init_end = .;
Expand Down
1 change: 1 addition & 0 deletions arch/arm/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ SECTIONS
. = ALIGN(4096);
__per_cpu_start = .;
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
#ifndef CONFIG_XIP_KERNEL
__init_begin = _stext;
Expand Down
5 changes: 1 addition & 4 deletions arch/cris/arch-v32/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,7 @@ SECTIONS
}
SECURITY_INIT

. = ALIGN (8192);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(8192)

#ifdef CONFIG_BLK_DEV_INITRD
.init.ramfs : {
Expand Down
5 changes: 1 addition & 4 deletions arch/frv/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,7 @@ SECTIONS
__alt_instructions_end = .;
.altinstr_replacement : { *(.altinstr_replacement) }

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)

#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(4096);
Expand Down
1 change: 1 addition & 0 deletions arch/i386/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ SECTIONS
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
__per_cpu_start = .;
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
. = ALIGN(4096);
Expand Down
1 change: 1 addition & 0 deletions arch/ia64/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ SECTIONS
{
__per_cpu_start = .;
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits
Expand Down
5 changes: 1 addition & 4 deletions arch/m32r/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,7 @@ SECTIONS
__initramfs_end = .;
#endif

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)
. = ALIGN(4096);
__init_end = .;
/* freed after init ends here */
Expand Down
5 changes: 1 addition & 4 deletions arch/mips/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,7 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
. = ALIGN(_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(_PAGE_SIZE)
. = ALIGN(_PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
7 changes: 3 additions & 4 deletions arch/parisc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,9 @@ SECTIONS
.init.ramfs : { *(.init.ramfs) }
__initramfs_end = .;
#endif
. = ALIGN(ASM_PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;

PERCPU(ASM_PAGE_SIZE)

. = ALIGN(ASM_PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
Expand Down
1 change: 1 addition & 0 deletions arch/powerpc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ SECTIONS
.data.percpu : {
__per_cpu_start = .;
*(.data.percpu)
*(.data.percpu.shared_aligned)
__per_cpu_end = .;
}

Expand Down
5 changes: 1 addition & 4 deletions arch/ppc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,7 @@ SECTIONS
__ftr_fixup : { *(__ftr_fixup) }
__stop___ftr_fixup = .;

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)

#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(4096);
Expand Down
5 changes: 1 addition & 4 deletions arch/s390/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,7 @@ SECTIONS
. = ALIGN(2);
__initramfs_end = .;
#endif
. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)
. = ALIGN(4096);
__init_end = .;
/* freed after init ends here */
Expand Down
5 changes: 1 addition & 4 deletions arch/sh/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__nosave_end = .;

. = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(PAGE_SIZE)
.data.cacheline_aligned : { *(.data.cacheline_aligned) }

_edata = .; /* End of data section */
Expand Down
5 changes: 4 additions & 1 deletion arch/sh64/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ SECTIONS

. = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) }
.data.percpu : C_PHYS(.data.percpu) {
*(.data.percpu)
*(.data.percpu.shared_aligned)
}
__per_cpu_end = . ;
.data.cacheline_aligned : C_PHYS(.data.cacheline_aligned) { *(.data.cacheline_aligned) }

Expand Down
5 changes: 1 addition & 4 deletions arch/sparc/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,7 @@ SECTIONS
__initramfs_end = .;
#endif

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)
. = ALIGN(4096);
__init_end = .;
. = ALIGN(32);
Expand Down
6 changes: 2 additions & 4 deletions arch/sparc64/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,8 @@ SECTIONS
__initramfs_end = .;
#endif

. = ALIGN(PAGE_SIZE);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(PAGE_SIZE)

. = ALIGN(PAGE_SIZE);
__init_end = .;
__bss_start = .;
Expand Down
6 changes: 2 additions & 4 deletions arch/x86_64/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,8 @@ SECTIONS
__initramfs_end = .;
#endif

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)

. = ALIGN(4096);
__init_end = .;

Expand Down
5 changes: 1 addition & 4 deletions arch/xtensa/kernel/vmlinux.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,7 @@ SECTIONS
__initramfs_end = .;
#endif

. = ALIGN(4096);
__per_cpu_start = .;
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
PERCPU(4096)


/* We need this dummy segment here */
Expand Down
8 changes: 8 additions & 0 deletions include/asm-generic/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp

/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*({ \
extern int simple_identifier_##var(void); \
Expand All @@ -34,6 +39,9 @@ do { \
#define DEFINE_PER_CPU(type, name) \
__typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)

#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
#define __raw_get_cpu_var(var) per_cpu__##var
Expand Down
8 changes: 8 additions & 0 deletions include/asm-generic/vmlinux.lds.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,11 @@
*(.initcall7.init) \
*(.initcall7s.init)

#define PERCPU(align) \
. = ALIGN(align); \
__per_cpu_start = .; \
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
*(.data.percpu) \
*(.data.percpu.shared_aligned) \
} \
__per_cpu_end = .;
5 changes: 5 additions & 0 deletions include/asm-i386/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ extern unsigned long __per_cpu_offset[];
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp

/* We can use this directly for local CPU (faster). */
DECLARE_PER_CPU(unsigned long, this_cpu_off);

Expand Down
10 changes: 10 additions & 0 deletions include/asm-ia64/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@
__attribute__((__section__(".data.percpu"))) \
__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name

#ifdef CONFIG_SMP
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__SMALL_ADDR_AREA __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
#else
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)
#endif

/*
* Pretty much a literal copy of asm-generic/percpu.h, except that percpu_modcopy() is an
* external routine, to avoid include-hell.
Expand Down
7 changes: 7 additions & 0 deletions include/asm-powerpc/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp

/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)))
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()))
Expand All @@ -40,6 +45,8 @@ extern void setup_per_cpu_areas(void);

#define DEFINE_PER_CPU(type, name) \
__typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)

#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
Expand Down
7 changes: 7 additions & 0 deletions include/asm-s390/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
__attribute__((__section__(".data.percpu"))) \
__typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp

#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset)
#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu])
Expand All @@ -59,6 +64,8 @@ do { \

#define DEFINE_PER_CPU(type, name) \
__typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)

#define __get_cpu_var(var) __reloc_hide(var,0)
#define __raw_get_cpu_var(var) __reloc_hide(var,0)
Expand Down
7 changes: 7 additions & 0 deletions include/asm-sparc64/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ extern unsigned long __per_cpu_shift;
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp

register unsigned long __local_per_cpu_offset asm("g5");

/* var is in discarded region: offset to particular copy we want */
Expand All @@ -38,6 +43,8 @@ do { \
#define real_setup_per_cpu_areas() do { } while (0)
#define DEFINE_PER_CPU(type, name) \
__typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)

#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
Expand Down
7 changes: 7 additions & 0 deletions include/asm-x86_64/percpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name

#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.shared_aligned"))) \
__typeof__(type) per_cpu__##name \
____cacheline_internodealigned_in_smp

/* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*({ \
extern int simple_identifier_##var(void); \
Expand All @@ -46,6 +51,8 @@ extern void setup_per_cpu_areas(void);

#define DEFINE_PER_CPU(type, name) \
__typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)

#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var))
#define __get_cpu_var(var) per_cpu__##var
Expand Down

0 comments on commit 5fb7dc3

Please sign in to comment.