Skip to content

Commit

Permalink
common implementation of iterative div/mod
Browse files Browse the repository at this point in the history
We have a few instances of the open-coded iterative div/mod loop, used
when we don't expcet the dividend to be much bigger than the divisor.
Unfortunately modern gcc's have the tendency to strength "reduce" this
into a full mod operation, which isn't necessarily any faster, and
even if it were, doesn't exist if gcc implements it in libgcc.

The workaround is to put a dummy asm statement in the loop to prevent
gcc from performing the transformation.

This patch creates a single implementation of this loop, and uses it
to replace the open-coded versions I know about.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: john stultz <[email protected]>
Cc: Segher Boessenkool <[email protected]>
Cc: Christian Kujau <[email protected]>
Cc: Robert Hancock <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Jeremy Fitzhardinge authored and Ingo Molnar committed Jun 12, 2008
1 parent 5e70b7f commit f595ec9
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 19 deletions.
13 changes: 3 additions & 10 deletions arch/x86/xen/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/kernel_stat.h>
#include <linux/math64.h>

#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
Expand Down Expand Up @@ -150,11 +151,7 @@ static void do_stolen_accounting(void)
if (stolen < 0)
stolen = 0;

ticks = 0;
while (stolen >= NS_PER_TICK) {
ticks++;
stolen -= NS_PER_TICK;
}
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
__get_cpu_var(residual_stolen) = stolen;
account_steal_time(NULL, ticks);

Expand All @@ -166,11 +163,7 @@ static void do_stolen_accounting(void)
if (blocked < 0)
blocked = 0;

ticks = 0;
while (blocked >= NS_PER_TICK) {
ticks++;
blocked -= NS_PER_TICK;
}
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
__get_cpu_var(residual_blocked) = blocked;
account_steal_time(idle_task(smp_processor_id()), ticks);
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/math64.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
}
#endif

u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);

#endif /* _LINUX_MATH64_H */
11 changes: 2 additions & 9 deletions include/linux/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifdef __KERNEL__
# include <linux/cache.h>
# include <linux/seqlock.h>
# include <linux/math64.h>
#endif

#ifndef _STRUCT_TIMESPEC
Expand Down Expand Up @@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec);
*/
static inline void timespec_add_ns(struct timespec *a, u64 ns)
{
ns += a->tv_nsec;
while(unlikely(ns >= NSEC_PER_SEC)) {
/* The following asm() prevents the compiler from
* optimising this loop into a modulo operation. */
asm("" : "+r"(ns));

ns -= NSEC_PER_SEC;
a->tv_sec++;
}
a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
a->tv_nsec = ns;
}
#endif /* __KERNEL__ */
Expand Down
23 changes: 23 additions & 0 deletions lib/div64.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64);
#endif

#endif /* BITS_PER_LONG == 32 */

/*
* Iterative div/mod for use when dividend is not expected to be much
* bigger than divisor.
*/
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
{
u32 ret = 0;

while (dividend >= divisor) {
/* The following asm() prevents the compiler from
optimising this loop into a modulo operation. */
asm("" : "+rm"(dividend));

dividend -= divisor;
ret++;
}

*remainder = dividend;

return ret;
}
EXPORT_SYMBOL(iter_div_u64_rem);

0 comments on commit f595ec9

Please sign in to comment.