From a3a3c62b81ef41fc3edbc991c165ff0acd3a29be Mon Sep 17 00:00:00 2001 From: Bimba Shrestha <bimbashrestha@fb.com> Date: Tue, 26 Nov 2019 12:24:19 -0800 Subject: [PATCH] [fuzz] Only set HUF_repeat_valid if loaded table has all non-zero weights (#1898) Fixes a fuzz issue where dictionary_round_trip failed because the compressor was generating corrupt files thanks to zero weights in the table. * Only setting loaded dict huf table to valid on non-zero * Adding hasNoZeroWeights test to fse tables * Forbiding nbBits != 0 when weight == 0 * Reverting the last commit * Setting table log to 0 when weight == 0 * Small (invalid) zero weight dict test * Small (valid) zero weight dict test * Initializing repeatMode vars to check before zero check * Removing FSE changes to seperate pr * Reverting accidentally changed file * Negating bool, using unsigned, optimization nit --- lib/common/huf.h | 2 +- lib/compress/huf_compress.c | 6 ++++-- lib/compress/zstd_compress.c | 14 +++++++++++--- tests/dict-files/zero-weight-dict | Bin 0 -> 153 bytes tests/playTests.sh | 17 ++++++++++++++++- 5 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 tests/dict-files/zero-weight-dict diff --git a/lib/common/huf.h b/lib/common/huf.h index 3026c43ea..4a87db5c1 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -247,7 +247,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); /** HUF_getNbBits() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 0cbba2c99..b8e6fb386 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -169,7 +169,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, } -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ @@ -192,9 +192,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void } } /* fill nbBits */ + *hasZeroWeights = 0; { U32 n; for (n=0; n<nbSymbols; n++) { const U32 w = huffWeight[n]; - CTable[n].nbBits = (BYTE)(tableLog + 1 - w); + *hasZeroWeights |= (w == 0); + CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0); } } /* fill val */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 5dacba8cf..682c9c047 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2853,14 +2853,23 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize) + const void* const dict, size_t dictSize) { const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; dictPtr += 8; { unsigned maxSymbolValue = 255; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + unsigned hasZeroWeights; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + else bs->entropy.huf.repeatMode = HUF_repeat_check; + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); dictPtr += hufHeaderSize; @@ -2967,7 +2976,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); } } - bs->entropy.huf.repeatMode = HUF_repeat_valid; bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; diff --git a/tests/dict-files/zero-weight-dict b/tests/dict-files/zero-weight-dict new file mode 100644 index 0000000000000000000000000000000000000000..c40412052137bc1963005832261c0d7bba3cf2bc GIT binary patch literal 153 zcmXqCV({j_!hZ%c0cl1?y}iLNME7hsam>Tyr|cHr`<`*TmPqDB));7UIf*WrRDV@X zQ10Nn8|O49L4XnKWhMn4F7~6%ix?a2cC)f599p@cPwmRn$VrDaxD-=*3|j;g7#JKF o98z2ufIy^RZLkQC_EBu<aXiLz_5=@*!wAGIK+FNe3JP%D0QZtKM*si- literal 0 HcmV?d00001 diff --git a/tests/playTests.sh b/tests/playTests.sh index 295525114..df9568eb5 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -502,6 +502,22 @@ cmp tmp tmp_decompress || die "difference between original and decompressed file println "test : incorrect stream size" cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" +println "\n===> zstd zero weight dict test " +rm -f tmp* +cp "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* + +println "\n===> zstd (valid) zero weight dict test " +rm -f tmp* +# 0 has a non-zero weight in the dictionary +echo "0000000000000000000000000" > tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* println "\n===> size-hint mode" @@ -1189,7 +1205,6 @@ $ZSTD --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c test -f dictionary rm -f tmp* dictionary - if [ "$isWindows" = false ] ; then println "\n===> zstd fifo named pipe test " -- GitLab