]> git.dujemihanovic.xyz Git - u-boot.git/commitdiff
zlib: Fix big performance regression
authorChristophe Leroy <christophe.leroy@csgroup.eu>
Tue, 16 Jul 2024 14:35:46 +0000 (08:35 -0600)
committerTom Rini <trini@konsulko.com>
Fri, 19 Jul 2024 22:48:07 +0000 (16:48 -0600)
Commit 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
brings a big performance regression in inflate_fast(), which leads
to watchdog timer reset on powerpc 8xx.

It looks like that commit does more than what it describe, it
especially removed an important optimisation that was doing copies
using halfwords instead of bytes. That unexpected change multiplied
by almost 4 the time spent in inflate_fast() and increased by 40%
the overall time needed to uncompress linux kernel image.

So partially revert that commit but keep post incrementation as it
is the initial purpose of said commit.

[trini: Combine assorted patches in to this one, just restoring the
        performance commit]

Fixes: 340fdf1303dc ("zlib: Port fix for CVE-2016-9841 to U-Boot")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Michal Simek <michal.simek@amd.com>
Signed-off-by: Tom Rini <trini@konsulko.com>
lib/zlib/inffast.c
lib/zlib/zlib.h

index 5e2a65ad4d273b719aeb3634b455a0c8bde193f8..b5a0adcce69f3be6cb314d91e69682cea6c28848 100644 (file)
@@ -236,18 +236,47 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                     }
                 }
                 else {
+                   unsigned short *sout;
+                   unsigned long loops;
+
                     from = out - dist;          /* copy direct from output */
-                    do {                        /* minimum length is three */
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        *out++ = *from++;
-                        len -= 3;
-                    } while (len > 2);
-                    if (len) {
-                        *out++ = *from++;
-                        if (len > 1)
-                            *out++ = *from++;
-                    }
+                    /* minimum length is three */
+                   /* Align out addr */
+                   if (!((long)(out - 1) & 1)) {
+                       *out++ = *from++;
+                       len--;
+                   }
+                   sout = (unsigned short *)out;
+                   if (dist > 2 ) {
+                       unsigned short *sfrom;
+
+                       sfrom = (unsigned short *)from;
+                       loops = len >> 1;
+                       do
+                           *sout++ = get_unaligned(sfrom++);
+                       while (--loops);
+                       out = (unsigned char *)sout;
+                       from = (unsigned char *)sfrom;
+                   } else { /* dist == 1 or dist == 2 */
+                       unsigned short pat16;
+
+                       pat16 = *(sout - 1);
+                       if (dist == 1)
+#if defined(__BIG_ENDIAN)
+                           pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
+#elif defined(__LITTLE_ENDIAN)
+                           pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
+#else
+#error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
+#endif
+                       loops = len >> 1;
+                       do
+                           *sout++ = pat16;
+                       while (--loops);
+                       out = (unsigned char *)sout;
+                   }
+                   if (len & 1)
+                       *out++ = *from++;
                 }
             }
             else if ((op & 64) == 0) {          /* 2nd level distance code */
index 560e7be97d3aae01582180c2ace68ceb02467aaa..f9b2f69ac0270747f0dbe38c78325f2a04fead6e 100644 (file)
@@ -10,7 +10,6 @@
 /* avoid conflicts */
 #undef OFF
 #undef ASMINF
-#undef POSTINC
 #undef NO_GZIP
 #define GUNZIP
 #undef STDC