]> git.dujemihanovic.xyz Git - u-boot.git/commitdiff
lz4: fix decompressor on big-endian powerpc
authorRasmus Villemoes <rasmus.villemoes@prevas.dk>
Sun, 7 Jun 2020 12:29:18 +0000 (14:29 +0200)
committerTom Rini <trini@konsulko.com>
Fri, 17 Jul 2020 12:45:29 +0000 (08:45 -0400)
Booting an lz4-compressed kernel image fails on our powerpc board with
-EPROTONOSUPPORT. Adding a bit of debug prints, we get

  magic: 0x184d2204
  flags: 0x64
  reserved0: 1
  has_content_checksum: 1
  has_content_size: 0
  has_block_checksum: 0
  independent_blocks: 1
  version: 0
  block_descriptor: 70
  reserved1: 7
  max_block_size: 0
  reserved2: 0

So the magic is ok, but the version check fails, also some reserved
bits are apparently set. But that's because the code interprets the
"flags" and "block_descriptor" bytes wrongly:

Using bit-fields to access individual bits of an "on the wire" format
is not portable, not even when restricted to the C flavour implemented
by gcc. Quoting the gcc manual:

   * 'The order of allocation of bit-fields within a unit (C90 6.5.2.1,
     C99 and C11 6.7.2.1).'

     Determined by ABI.

and indeed, the PPC Processor ABI supplement says

   * Bit-fields are allocated from right to left (least to most
     significant) on Little-Endian implementations and from left to
     right (most to least significant) on Big-Endian implementations.

The upstream code (github.com/lz4/lz4) uses explicit shifts and masks
for encoding/decoding:

    /* FLG Byte */
    *dstPtr++ = (BYTE)(((1 & _2BITS) << 6)    /* Version('01') */
        + ((cctxPtr->prefs.frameInfo.blockMode & _1BIT ) << 5)
        + ((cctxPtr->prefs.frameInfo.blockChecksumFlag & _1BIT ) << 4)
        + ((unsigned)(cctxPtr->prefs.frameInfo.contentSize > 0) << 3)
        + ((cctxPtr->prefs.frameInfo.contentChecksumFlag & _1BIT ) << 2)
        +  (cctxPtr->prefs.frameInfo.dictID > 0) );

    /* Flags */
    {   U32 const FLG = srcPtr[4];
        U32 const version = (FLG>>6) & _2BITS;
        blockChecksumFlag = (FLG>>4) & _1BIT;
        blockMode = (FLG>>5) & _1BIT;
        contentSizeFlag = (FLG>>3) & _1BIT;
        contentChecksumFlag = (FLG>>2) & _1BIT;
        dictIDFlag = FLG & _1BIT;
        /* validate */
        if (((FLG>>1)&_1BIT) != 0) return err0r(LZ4F_ERROR_reservedFlag_set); /* Reserved bit */
        if (version != 1) return err0r(LZ4F_ERROR_headerVersion_wrong);        /* Version Number, only supported value */
    }

Do the same here, and while at it, be more careful to use unaligned
accessors to what is most likely unaligned. Also update the comment to
make it clear that it only refers to the lz4.c file, not the following
code of lz4_wrapper.c.

This has been tested partly, of course, by seeing that my
lz4-compressed kernel now boots, partly by running the (de)compression
test-suite in the (x86_64) sandbox - i.e., it should still work just
fine on little-endian hosts.

Reviewed-by: Julius Werner <jwerner@chromium.org>
Signed-off-by: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
lib/lz4_wrapper.c

index 1e1e8d508530d44d5e1eb8e3f68e6f5d28175877..e0f7d3688ee5b599c1c53844c417d9b165e9059c 100644 (file)
@@ -9,6 +9,7 @@
 #include <lz4.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <asm/unaligned.h>
 
 static u16 LZ4_readLE16(const void *src) { return le16_to_cpu(*(u16 *)src); }
 static void LZ4_copy4(void *dst, const void *src) { *(u32 *)dst = *(u32 *)src; }
@@ -22,45 +23,10 @@ typedef uint64_t U64;
 
 #define FORCE_INLINE static inline __attribute__((always_inline))
 
-/* Unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */
+/* lz4.c is unaltered (except removing unrelated code) from github.com/Cyan4973/lz4. */
 #include "lz4.c"       /* #include for inlining, do not link! */
 
-struct lz4_frame_header {
-       u32 magic;
-       union {
-               u8 flags;
-               struct {
-                       u8 reserved0:2;
-                       u8 has_content_checksum:1;
-                       u8 has_content_size:1;
-                       u8 has_block_checksum:1;
-                       u8 independent_blocks:1;
-                       u8 version:2;
-               };
-       };
-       union {
-               u8 block_descriptor;
-               struct {
-                       u8 reserved1:4;
-                       u8 max_block_size:3;
-                       u8 reserved2:1;
-               };
-       };
-       /* + u64 content_size iff has_content_size is set */
-       /* + u8 header_checksum */
-} __packed;
-
-struct lz4_block_header {
-       union {
-               u32 raw;
-               struct {
-                       u32 size:31;
-                       u32 not_compressed:1;
-               };
-       };
-       /* + size bytes of data */
-       /* + u32 block_checksum iff has_block_checksum is set */
-} __packed;
+#define LZ4F_BLOCKUNCOMPRESSED_FLAG 0x80000000U
 
 int ulz4fn(const void *src, size_t srcn, void *dst, size_t *dstn)
 {
@@ -72,53 +38,70 @@ int ulz4fn(const void *src, size_t srcn, void *dst, size_t *dstn)
        *dstn = 0;
 
        { /* With in-place decompression the header may become invalid later. */
-               const struct lz4_frame_header *h = in;
+               u32 magic;
+               u8 flags, version, independent_blocks, has_content_size;
+               u8 block_desc;
 
-               if (srcn < sizeof(*h) + sizeof(u64) + sizeof(u8))
+               if (srcn < sizeof(u32) + 3*sizeof(u8))
                        return -EINVAL; /* input overrun */
 
+               magic = get_unaligned_le32(in);
+               in += sizeof(u32);
+               flags = *(u8 *)in;
+               in += sizeof(u8);
+               block_desc = *(u8 *)in;
+               in += sizeof(u8);
+
+               version = (flags >> 6) & 0x3;
+               independent_blocks = (flags >> 5) & 0x1;
+               has_block_checksum = (flags >> 4) & 0x1;
+               has_content_size = (flags >> 3) & 0x1;
+
                /* We assume there's always only a single, standard frame. */
-               if (le32_to_cpu(h->magic) != LZ4F_MAGIC || h->version != 1)
+               if (magic != LZ4F_MAGIC || version != 1)
                        return -EPROTONOSUPPORT;        /* unknown format */
-               if (h->reserved0 || h->reserved1 || h->reserved2)
-                       return -EINVAL; /* reserved must be zero */
-               if (!h->independent_blocks)
+               if ((flags & 0x03) || (block_desc & 0x8f))
+                       return -EINVAL; /* reserved bits must be zero */
+               if (!independent_blocks)
                        return -EPROTONOSUPPORT; /* we can't support this yet */
-               has_block_checksum = h->has_block_checksum;
 
-               in += sizeof(*h);
-               if (h->has_content_size)
+               if (has_content_size) {
+                       if (srcn < sizeof(u32) + 3*sizeof(u8) + sizeof(u64))
+                               return -EINVAL; /* input overrun */
                        in += sizeof(u64);
+               }
+               /* Header checksum byte */
                in += sizeof(u8);
        }
 
        while (1) {
-               struct lz4_block_header b;
+               u32 block_header, block_size;
 
-               b.raw = le32_to_cpu(*(u32 *)in);
-               in += sizeof(struct lz4_block_header);
+               block_header = get_unaligned_le32(in);
+               in += sizeof(u32);
+               block_size = block_header & ~LZ4F_BLOCKUNCOMPRESSED_FLAG;
 
-               if (in - src + b.size > srcn) {
+               if (in - src + block_size > srcn) {
                        ret = -EINVAL;          /* input overrun */
                        break;
                }
 
-               if (!b.size) {
+               if (!block_size) {
                        ret = 0;        /* decompression successful */
                        break;
                }
 
-               if (b.not_compressed) {
-                       size_t size = min((ptrdiff_t)b.size, end - out);
+               if (block_header & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
+                       size_t size = min((ptrdiff_t)block_size, end - out);
                        memcpy(out, in, size);
                        out += size;
-                       if (size < b.size) {
+                       if (size < block_size) {
                                ret = -ENOBUFS; /* output overrun */
                                break;
                        }
                } else {
                        /* constant folding essential, do not touch params! */
-                       ret = LZ4_decompress_generic(in, out, b.size,
+                       ret = LZ4_decompress_generic(in, out, block_size,
                                        end - out, endOnInputSize,
                                        full, 0, noDict, out, NULL, 0);
                        if (ret < 0) {
@@ -128,7 +111,7 @@ int ulz4fn(const void *src, size_t srcn, void *dst, size_t *dstn)
                        out += ret;
                }
 
-               in += b.size;
+               in += block_size;
                if (has_block_checksum)
                        in += sizeof(u32);
        }