diff options
Diffstat (limited to 'package/busybox/busybox-1.15.0-unlzma.patch')
-rw-r--r-- | package/busybox/busybox-1.15.0-unlzma.patch | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/package/busybox/busybox-1.15.0-unlzma.patch b/package/busybox/busybox-1.15.0-unlzma.patch new file mode 100644 index 000000000..83ecd5a05 --- /dev/null +++ b/package/busybox/busybox-1.15.0-unlzma.patch @@ -0,0 +1,323 @@ +diff -urpN busybox-1.15.0/archival/Config.in busybox-1.15.0-unlzma/archival/Config.in +--- busybox-1.15.0/archival/Config.in 2009-08-21 00:26:13.000000000 +0200 ++++ busybox-1.15.0-unlzma/archival/Config.in 2009-09-06 15:10:58.000000000 +0200 +@@ -298,8 +298,8 @@ config FEATURE_LZMA_FAST + default n + depends on UNLZMA + help +- This option reduces decompression time by about 25% at the cost of +- a 1K bigger binary. ++ This option reduces decompression time by about 33% at the cost of ++ a 2K bigger binary. + + config UNZIP + bool "unzip" +diff -urpN busybox-1.15.0/archival/libunarchive/decompress_unlzma.c busybox-1.15.0-unlzma/archival/libunarchive/decompress_unlzma.c +--- busybox-1.15.0/archival/libunarchive/decompress_unlzma.c 2009-08-21 00:26:13.000000000 +0200 ++++ busybox-1.15.0-unlzma/archival/libunarchive/decompress_unlzma.c 2009-09-06 15:10:58.000000000 +0200 +@@ -8,15 +8,14 @@ + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ ++ + #include "libbb.h" + #include "unarchive.h" + + #if ENABLE_FEATURE_LZMA_FAST + # define speed_inline ALWAYS_INLINE +-# define size_inline + #else + # define speed_inline +-# define size_inline ALWAYS_INLINE + #endif + + +@@ -45,8 +44,8 @@ typedef struct { + #define RC_MODEL_TOTAL_BITS 11 + + +-/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ +-static size_inline void rc_read(rc_t *rc) ++/* Called twice: once at startup and once in rc_normalize() */ ++static void rc_read(rc_t *rc) + { + int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); + if (buffer_size <= 0) +@@ -55,17 +54,8 @@ static size_inline void rc_read(rc_t *rc + rc->buffer_end = RC_BUFFER + buffer_size; + } + +-/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ +-static void rc_do_normalize(rc_t *rc) +-{ +- if (rc->ptr >= rc->buffer_end) +- rc_read(rc); +- rc->range <<= 8; +- rc->code = (rc->code << 8) | *rc->ptr++; +-} +- + /* Called once */ +-static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ ++static rc_t* rc_init(int fd) /*, int buffer_size) */ + { + int i; + rc_t *rc; +@@ -73,18 +63,17 @@ static ALWAYS_INLINE rc_t* rc_init(int f + rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); + + rc->fd = fd; ++ /* rc->buffer_size = buffer_size; */ ++ rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; + rc->ptr = rc->buffer_end; + ++ rc->code = 0; ++ rc->range = 0xFFFFFFFF; + for (i = 0; i < 5; i++) { +-#if ENABLE_FEATURE_LZMA_FAST + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; +-#else +- rc_do_normalize(rc); +-#endif + } +- rc->range = 0xFFFFFFFF; + return rc; + } + +@@ -94,6 +83,14 @@ static ALWAYS_INLINE void rc_free(rc_t * + free(rc); + } + ++/* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ ++static void rc_do_normalize(rc_t *rc) ++{ ++ if (rc->ptr >= rc->buffer_end) ++ rc_read(rc); ++ rc->range <<= 8; ++ rc->code = (rc->code << 8) | *rc->ptr++; ++} + static ALWAYS_INLINE void rc_normalize(rc_t *rc) + { + if (rc->range < (1 << RC_TOP_BITS)) { +@@ -101,28 +98,49 @@ static ALWAYS_INLINE void rc_normalize(r + } + } + +-/* rc_is_bit_1 is called 9 times */ +-static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) ++/* rc_is_bit_0 is called 9 times */ ++/* Why rc_is_bit_0_helper exists? ++ * Because we want to always expose (rc->code < rc->bound) to optimizer. ++ * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined ++ * only if we compile for speed. ++ */ ++static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) + { + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); +- if (rc->code < rc->bound) { +- rc->range = rc->bound; +- *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +- return 0; +- } ++ return rc->bound; ++} ++static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) ++{ ++ uint32_t t = rc_is_bit_0_helper(rc, p); ++ return rc->code < t; ++} ++ ++/* Called ~10 times, but very small, thus inlined */ ++static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) ++{ ++ rc->range = rc->bound; ++ *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; ++} ++static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) ++{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +- return 1; + } + + /* Called 4 times in unlzma loop */ +-static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) ++static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) + { +- int ret = rc_is_bit_1(rc, p); +- *symbol = *symbol * 2 + ret; +- return ret; ++ if (rc_is_bit_0(rc, p)) { ++ rc_update_bit_0(rc, p); ++ *symbol *= 2; ++ return 0; ++ } else { ++ rc_update_bit_1(rc, p); ++ *symbol = *symbol * 2 + 1; ++ return 1; ++ } + } + + /* Called once */ +@@ -248,13 +266,13 @@ unpack_lzma_stream(int src_fd, int dst_f + header.dst_size = SWAP_LE64(header.dst_size); + + if (header.dict_size == 0) +- header.dict_size++; ++ header.dict_size = 1; + + buffer = xmalloc(MIN(header.dst_size, header.dict_size)); + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = xmalloc(num_probs * sizeof(*p)); +- num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; ++ num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + +@@ -264,8 +282,9 @@ unpack_lzma_stream(int src_fd, int dst_f + int pos_state = (buffer_pos + global_pos) & pos_state_mask; + + prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; +- if (!rc_is_bit_1(rc, prob)) { ++ if (rc_is_bit_0(rc, prob)) { + mi = 1; ++ rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + + (previous_byte >> (8 - lc)) +@@ -321,21 +340,27 @@ unpack_lzma_stream(int src_fd, int dst_f + int offset; + uint16_t *prob_len; + ++ rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + state; +- if (!rc_is_bit_1(rc, prob)) { ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + state = state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { +- prob += LZMA_IS_REP_G0 - LZMA_IS_REP; +- if (!rc_is_bit_1(rc, prob)) { ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G0 + state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (state << LZMA_NUM_POS_BITS_MAX) + + pos_state + ); +- if (!rc_is_bit_1(rc, prob)) { ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + #if ENABLE_FEATURE_LZMA_FAST + pos = buffer_pos - rep0; +@@ -347,16 +372,25 @@ unpack_lzma_stream(int src_fd, int dst_f + len = 1; + goto string; + #endif ++ } else { ++ rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + +- prob += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; +- distance = rep1; +- if (rc_is_bit_1(rc, prob)) { +- prob += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; +- distance = rep2; +- if (rc_is_bit_1(rc, prob)) { ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G1 + state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ distance = rep1; ++ } else { ++ rc_update_bit_1(rc, prob); ++ prob = p + LZMA_IS_REP_G2 + state; ++ if (rc_is_bit_0(rc, prob)) { ++ rc_update_bit_0(rc, prob); ++ distance = rep2; ++ } else { ++ rc_update_bit_1(rc, prob); + distance = rep3; + rep3 = rep2; + } +@@ -370,20 +404,24 @@ unpack_lzma_stream(int src_fd, int dst_f + } + + prob_len = prob + LZMA_LEN_CHOICE; +- if (!rc_is_bit_1(rc, prob_len)) { +- prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE +- + (pos_state << LZMA_LEN_NUM_LOW_BITS); ++ if (rc_is_bit_0(rc, prob_len)) { ++ rc_update_bit_0(rc, prob_len); ++ prob_len = (prob + LZMA_LEN_LOW ++ + (pos_state << LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { +- prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; +- if (!rc_is_bit_1(rc, prob_len)) { +- prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 +- + (pos_state << LZMA_LEN_NUM_MID_BITS); ++ rc_update_bit_1(rc, prob_len); ++ prob_len = prob + LZMA_LEN_CHOICE_2; ++ if (rc_is_bit_0(rc, prob_len)) { ++ rc_update_bit_0(rc, prob_len); ++ prob_len = (prob + LZMA_LEN_MID ++ + (pos_state << LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { +- prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; ++ rc_update_bit_1(rc, prob_len); ++ prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; +@@ -400,20 +438,19 @@ unpack_lzma_stream(int src_fd, int dst_f + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); +- rc_bit_tree_decode(rc, prob, +- LZMA_NUM_POS_SLOT_BITS, &pos_slot); +- rep0 = pos_slot; ++ rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, ++ &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + num_bits = (pos_slot >> 1) - 1; + rep0 = 2 | (pos_slot & 1); +- prob = p + LZMA_ALIGN; + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + rep0 <<= num_bits; +- prob += LZMA_SPEC_POS - LZMA_ALIGN - 1 + rep0 - pos_slot; ++ prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + rep0 = (rep0 << 1) | rc_direct_bit(rc); ++ prob = p + LZMA_ALIGN; + rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } +@@ -424,7 +461,8 @@ unpack_lzma_stream(int src_fd, int dst_f + rep0 |= i; + i <<= 1; + } +- } ++ } else ++ rep0 = pos_slot; + if (++rep0 == 0) + break; + } |