Skip to content

Commit

Permalink
zlib: optimize inffast when copying direct from output
Browse files Browse the repository at this point in the history
JFFS2 uses lesser compression ratio and inflate always ends up in "copy
direct from output" case.

This patch tries to optimize the direct copy procedure.  Uses
get_unaligned() but only in one place.

The copy loop just above this one can also use this optimization, but I
havn't done so as I have not tested if it is a win there too.

On my MPC8321 this is about 17% faster on my JFFS2 root FS than the
original.

[[email protected]: coding-style fixes]
Signed-off-by: Joakim Tjernlund <[email protected]>
Cc: Roel Kluin <[email protected]>
Cc: Richard Purdie <[email protected]>
Cc: David Woodhouse <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
joakim-tjernlund authored and torvalds committed Jan 11, 2010
1 parent 129182e commit ac4c2a3
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 12 deletions.
4 changes: 3 additions & 1 deletion arch/powerpc/boot/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
all: $(obj)/zImage

BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -Os -msoft-float -pipe \
-fno-strict-aliasing -Os -msoft-float -pipe -D__KERNEL__\
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-isystem $(shell $(CROSS32CC) -print-file-name=include)
BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
Expand All @@ -34,6 +34,8 @@ BOOTCFLAGS += -fno-stack-protector
endif

BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
BOOTCFLAGS += -include include/linux/autoconf.h -Iarch/powerpc/include
BOOTCFLAGS += -Iinclude

DTS_FLAGS ?= -p 1024

Expand Down
55 changes: 44 additions & 11 deletions lib/zlib_inflate/inffast.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
*/

#include <linux/zutil.h>
#include <asm/unaligned.h>
#include <asm/byteorder.h>
#include "inftrees.h"
#include "inflate.h"
#include "inffast.h"
Expand All @@ -24,9 +26,11 @@
#ifdef POSTINC
# define OFF 0
# define PUP(a) *(a)++
# define UP_UNALIGNED(a) get_unaligned((a)++)
#else
# define OFF 1
# define PUP(a) *++(a)
# define UP_UNALIGNED(a) get_unaligned(++(a))
#endif

/*
Expand Down Expand Up @@ -239,18 +243,47 @@ void inflate_fast(z_streamp strm, unsigned start)
}
}
else {
unsigned short *sout;
unsigned long loops;

from = out - dist; /* copy direct from output */
do { /* minimum length is three */
PUP(out) = PUP(from);
PUP(out) = PUP(from);
PUP(out) = PUP(from);
len -= 3;
} while (len > 2);
if (len) {
PUP(out) = PUP(from);
if (len > 1)
PUP(out) = PUP(from);
}
/* minimum length is three */
/* Align out addr */
if (!((long)(out - 1 + OFF) & 1)) {
PUP(out) = PUP(from);
len--;
}
sout = (unsigned short *)(out - OFF);
if (dist > 2) {
unsigned short *sfrom;

sfrom = (unsigned short *)(from - OFF);
loops = len >> 1;
do
PUP(sout) = UP_UNALIGNED(sfrom);
while (--loops);
out = (unsigned char *)sout + OFF;
from = (unsigned char *)sfrom + OFF;
} else { /* dist == 1 or dist == 2 */
unsigned short pat16;

pat16 = *(sout-2+2*OFF);
if (dist == 1)
#if defined(__BIG_ENDIAN)
pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8);
#elif defined(__LITTLE_ENDIAN)
pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8);
#else
#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
#endif
loops = len >> 1;
do
PUP(sout) = pat16;
while (--loops);
out = (unsigned char *)sout + OFF;
}
if (len & 1)
PUP(out) = PUP(from);
}
}
else if ((op & 64) == 0) { /* 2nd level distance code */
Expand Down

0 comments on commit ac4c2a3

Please sign in to comment.