Thread
Thread Index
-
[PATCH] optimize zip_open(),
Alan Jenkins
(2010/01/16 14:42:59)
Message
The profile of zip_open() is dominated by memchr() - searching for
the magic number of the central directory trailer at the end of the
file.
The search is necessary due to the variable length comment field,
which can be up to 64k. However in most zip files, the comment
is empty (or at least much smaller than 64k). So it is more
efficient to search backwards for the magic number from the end of the
file, rather than searching forwards from EOF-64k.
This is the same method as used in the "unzip" program.
The optimisation reduces libzip overhead from 60% to 40% when
extracting metadata from an FB2 zipped e-book file using the FB2
plugin from libextractor-mini.
diff --git a/lib/zip_open.c b/lib/zip_open.c
index 6a69036..8a3b79a 100644
--- a/lib/zip_open.c
+++ b/lib/zip_open.c
@@ -51,7 +51,7 @@ static struct zip_cdir *_zip_find_central_dir(FILE *, int,
int *, off_t);
static int _zip_file_exists(const char *, int, int *);
static int _zip_headercomp(struct zip_dirent *, int,
struct zip_dirent *, int);
-static unsigned char *_zip_memmem(const unsigned char *, int,
+static unsigned char *_zip_memrmem(const unsigned char *, int,
const unsigned char *, int);
static struct zip_cdir *_zip_readcdir(FILE *, unsigned char *, unsigned char *,
int, int, struct zip_error *);
@@ -456,7 +456,7 @@ _zip_find_central_dir(FILE *fp, int flags, int *zep, off_t
len)
match = buf;
_zip_error_set(&zerr, ZIP_ER_NOZIP, 0);
- while ((match=_zip_memmem(match, buflen-(match-buf)-18,
+ while ((match=_zip_memrmem(match, buflen-(match-buf)-18,
(const unsigned char *)EOCD_MAGIC, 4))!=NULL) {
/* found match -- check, if good */
/* to avoid finding the same match all over again */
@@ -499,18 +499,30 @@ _zip_find_central_dir(FILE *fp, int flags, int *zep,
off_t len)
}
+/* zipfile comments are usually much shorter than the max of 64k,
+ so we ideally want to search _backwards_ for EOCD_MAGIC. This makes
+ memrchr() much faster than memchr() for our purposes, even though we
+ use our own version instead of an optimised one from libc. */
+static const unsigned char *
+_zip_memrchr(const unsigned char *s, int c, size_t n)
+{
+ while (n--)
+ if (s[n] == c)
+ return &s[n];
+ return NULL;
+}
static unsigned char *
-_zip_memmem(const unsigned char *big, int biglen, const unsigned char *little,
+_zip_memrmem(const unsigned char *big, int biglen, const unsigned char *little,
int littlelen)
{
const unsigned char *p;
if ((biglen < littlelen) || (littlelen == 0))
return NULL;
- p = big-1;
+ p = big+biglen;
while ((p=(const unsigned char *)
- memchr(p+1, little[0], (size_t)(big-(p+1)+biglen-littlelen+1)))
+ _zip_memrchr(big, little[0], (size_t)(p-big)-(littlelen-1)))
!= NULL) {
if (memcmp(p+1, little+1, littlelen-1)==0)
return (unsigned char *)p;
Made by MHonArc.
|