From 7255c65caa08f5069a4c36dc76327cae513a9099 Mon Sep 17 00:00:00 2001 From: Robert Glonek Date: Fri, 22 Nov 2024 10:32:47 -0800 Subject: [PATCH] add vendored dependency --- src/go.mod | 1 + src/go.sum | 2 + src/vendor/github.com/xi2/xz/AUTHORS | 8 + src/vendor/github.com/xi2/xz/LICENSE | 18 + src/vendor/github.com/xi2/xz/README.md | 10 + src/vendor/github.com/xi2/xz/dec_bcj.go | 461 ++++++++ src/vendor/github.com/xi2/xz/dec_delta.go | 55 + src/vendor/github.com/xi2/xz/dec_lzma2.go | 1235 ++++++++++++++++++++ src/vendor/github.com/xi2/xz/dec_stream.go | 932 +++++++++++++++ src/vendor/github.com/xi2/xz/dec_util.go | 52 + src/vendor/github.com/xi2/xz/dec_xz.go | 124 ++ src/vendor/github.com/xi2/xz/doc.go | 35 + src/vendor/github.com/xi2/xz/reader.go | 256 ++++ src/vendor/modules.txt | 3 + 14 files changed, 3192 insertions(+) create mode 100644 src/vendor/github.com/xi2/xz/AUTHORS create mode 100644 src/vendor/github.com/xi2/xz/LICENSE create mode 100644 src/vendor/github.com/xi2/xz/README.md create mode 100644 src/vendor/github.com/xi2/xz/dec_bcj.go create mode 100644 src/vendor/github.com/xi2/xz/dec_delta.go create mode 100644 src/vendor/github.com/xi2/xz/dec_lzma2.go create mode 100644 src/vendor/github.com/xi2/xz/dec_stream.go create mode 100644 src/vendor/github.com/xi2/xz/dec_util.go create mode 100644 src/vendor/github.com/xi2/xz/dec_xz.go create mode 100644 src/vendor/github.com/xi2/xz/doc.go create mode 100644 src/vendor/github.com/xi2/xz/reader.go diff --git a/src/go.mod b/src/go.mod index e4c44057..9f1c804a 100644 --- a/src/go.mod +++ b/src/go.mod @@ -30,6 +30,7 @@ require ( github.com/rglonek/envconfig v0.0.0-20230911195903-c4c689bf1744 github.com/rglonek/jeddevdk-goflags v2.0.0+incompatible github.com/rglonek/sbs v1.0.1 + github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 golang.org/x/crypto v0.26.0 golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 golang.org/x/sys v0.24.0 diff --git a/src/go.sum b/src/go.sum index 619e14ea..518ea8e5 100644 --- a/src/go.sum +++ b/src/go.sum @@ -181,6 +181,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofmx9yWTog9BfvIu0q41lo= +github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= diff --git a/src/vendor/github.com/xi2/xz/AUTHORS b/src/vendor/github.com/xi2/xz/AUTHORS new file mode 100644 index 00000000..657330e1 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/AUTHORS @@ -0,0 +1,8 @@ +# Package xz authors + +Michael Cross + +# XZ Embedded authors + +Lasse Collin +Igor Pavlov diff --git a/src/vendor/github.com/xi2/xz/LICENSE b/src/vendor/github.com/xi2/xz/LICENSE new file mode 100644 index 00000000..b56f2e6a --- /dev/null +++ b/src/vendor/github.com/xi2/xz/LICENSE @@ -0,0 +1,18 @@ +Licensing of github.com/xi2/xz +============================== + + This Go package is a modified version of + + XZ Embedded + + The contents of the testdata directory are modified versions of + the test files from + + XZ Utils + + All the files in this package have been written by Michael Cross, + Lasse Collin and/or Igor PavLov. All these files have been put + into the public domain. You can do whatever you want with these + files. + + This software is provided "as is", without any warranty. diff --git a/src/vendor/github.com/xi2/xz/README.md b/src/vendor/github.com/xi2/xz/README.md new file mode 100644 index 00000000..2190af55 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/README.md @@ -0,0 +1,10 @@ +# Xz + +Package xz implements XZ decompression natively in Go. + +Documentation at . + +Download and install with `go get github.com/xi2/xz`. + +If you need compression as well as decompression, you might want to +look at . diff --git a/src/vendor/github.com/xi2/xz/dec_bcj.go b/src/vendor/github.com/xi2/xz/dec_bcj.go new file mode 100644 index 00000000..a8a3df92 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_bcj.go @@ -0,0 +1,461 @@ +/* + * Branch/Call/Jump (BCJ) filter decoders + * + * Authors: Lasse Collin + * Igor Pavlov + * + * Translation to Go: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +/* from linux/lib/xz/xz_dec_bcj.c *************************************/ + +type xzDecBCJ struct { + /* Type of the BCJ filter being used */ + typ xzFilterID + /* + * Return value of the next filter in the chain. We need to preserve + * this information across calls, because we must not call the next + * filter anymore once it has returned xzStreamEnd + */ + ret xzRet + /* + * Absolute position relative to the beginning of the uncompressed + * data (in a single .xz Block). + */ + pos int + /* x86 filter state */ + x86PrevMask uint32 + /* Temporary space to hold the variables from xzBuf */ + out []byte + outPos int + temp struct { + /* Amount of already filtered data in the beginning of buf */ + filtered int + /* + * Buffer to hold a mix of filtered and unfiltered data. This + * needs to be big enough to hold Alignment + 2 * Look-ahead: + * + * Type Alignment Look-ahead + * x86 1 4 + * PowerPC 4 0 + * IA-64 16 0 + * ARM 4 0 + * ARM-Thumb 2 2 + * SPARC 4 0 + */ + buf []byte // slice buf will be backed by bufArray + bufArray [16]byte + } +} + +/* + * This is used to test the most significant byte of a memory address + * in an x86 instruction. + */ +func bcjX86TestMSByte(b byte) bool { + return b == 0x00 || b == 0xff +} + +func bcjX86Filter(s *xzDecBCJ, buf []byte) int { + var maskToAllowedStatus = []bool{ + true, true, true, false, true, false, false, false, + } + var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3} + var i int + var prevPos int = -1 + var prevMask uint32 = s.x86PrevMask + var src uint32 + var dest uint32 + var j uint32 + var b byte + if len(buf) <= 4 { + return 0 + } + for i = 0; i < len(buf)-4; i++ { + if buf[i]&0xfe != 0xe8 { + continue + } + prevPos = i - prevPos + if prevPos > 3 { + prevMask = 0 + } else { + prevMask = (prevMask << (uint(prevPos) - 1)) & 7 + if prevMask != 0 { + b = buf[i+4-int(maskToBitNum[prevMask])] + if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) { + prevPos = i + prevMask = prevMask<<1 | 1 + continue + } + } + } + prevPos = i + if bcjX86TestMSByte(buf[i+4]) { + src = getLE32(buf[i+1:]) + for { + dest = src - uint32(s.pos+i+5) + if prevMask == 0 { + break + } + j = uint32(maskToBitNum[prevMask]) * 8 + b = byte(dest >> (24 - j)) + if !bcjX86TestMSByte(b) { + break + } + src = dest ^ (1<<(32-j) - 1) + } + dest &= 0x01FFFFFF + dest |= 0 - dest&0x01000000 + putLE32(dest, buf[i+1:]) + i += 4 + } else { + prevMask = prevMask<<1 | 1 + } + } + prevPos = i - prevPos + if prevPos > 3 { + s.x86PrevMask = 0 + } else { + s.x86PrevMask = prevMask << (uint(prevPos) - 1) + } + return i +} + +func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int { + var i int + var instr uint32 + for i = 0; i+4 <= len(buf); i += 4 { + instr = getBE32(buf[i:]) + if instr&0xFC000003 == 0x48000001 { + instr &= 0x03FFFFFC + instr -= uint32(s.pos + i) + instr &= 0x03FFFFFC + instr |= 0x48000001 + putBE32(instr, buf[i:]) + } + } + return i +} + +var bcjIA64BranchTable = [...]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0, +} + +func bcjIA64Filter(s *xzDecBCJ, buf []byte) int { + var branchTable = bcjIA64BranchTable[:] + /* + * The local variables take a little bit stack space, but it's less + * than what LZMA2 decoder takes, so it doesn't make sense to reduce + * stack usage here without doing that for the LZMA2 decoder too. + */ + /* Loop counters */ + var i int + var j int + /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ + var slot uint32 + /* Bitwise offset of the instruction indicated by slot */ + var bitPos uint32 + /* bit_pos split into byte and bit parts */ + var bytePos uint32 + var bitRes uint32 + /* Address part of an instruction */ + var addr uint32 + /* Mask used to detect which instructions to convert */ + var mask uint32 + /* 41-bit instruction stored somewhere in the lowest 48 bits */ + var instr uint64 + /* Instruction normalized with bit_res for easier manipulation */ + var norm uint64 + for i = 0; i+16 <= len(buf); i += 16 { + mask = uint32(branchTable[buf[i]&0x1f]) + for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 { + if (mask>>slot)&1 == 0 { + continue + } + bytePos = bitPos >> 3 + bitRes = bitPos & 7 + instr = 0 + for j = 0; j < 6; j++ { + instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j)) + } + norm = instr >> bitRes + if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 { + addr = uint32((norm >> 13) & 0x0fffff) + addr |= (uint32(norm>>36) & 1) << 20 + addr <<= 4 + addr -= uint32(s.pos + i) + addr >>= 4 + norm &= ^(uint64(0x8fffff) << 13) + norm |= uint64(addr&0x0fffff) << 13 + norm |= uint64(addr&0x100000) << (36 - 20) + instr &= 1<> (8 * uint(j))) + } + } + } + } + return i +} + +func bcjARMFilter(s *xzDecBCJ, buf []byte) int { + var i int + var addr uint32 + for i = 0; i+4 <= len(buf); i += 4 { + if buf[i+3] == 0xeb { + addr = uint32(buf[i]) | uint32(buf[i+1])<<8 | + uint32(buf[i+2])<<16 + addr <<= 2 + addr -= uint32(s.pos + i + 8) + addr >>= 2 + buf[i] = byte(addr) + buf[i+1] = byte(addr >> 8) + buf[i+2] = byte(addr >> 16) + } + } + return i +} + +func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int { + var i int + var addr uint32 + for i = 0; i+4 <= len(buf); i += 2 { + if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 { + addr = uint32(buf[i+1]&0x07)<<19 | + uint32(buf[i])<<11 | + uint32(buf[i+3]&0x07)<<8 | + uint32(buf[i+2]) + addr <<= 1 + addr -= uint32(s.pos + i + 4) + addr >>= 1 + buf[i+1] = byte(0xf0 | (addr>>19)&0x07) + buf[i] = byte(addr >> 11) + buf[i+3] = byte(0xf8 | (addr>>8)&0x07) + buf[i+2] = byte(addr) + i += 2 + } + } + return i +} + +func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int { + var i int + var instr uint32 + for i = 0; i+4 <= len(buf); i += 4 { + instr = getBE32(buf[i:]) + if instr>>22 == 0x100 || instr>>22 == 0x1ff { + instr <<= 2 + instr -= uint32(s.pos + i) + instr >>= 2 + instr = (0x40000000 - instr&0x400000) | + 0x40000000 | (instr & 0x3FFFFF) + putBE32(instr, buf[i:]) + } + } + return i +} + +/* + * Apply the selected BCJ filter. Update *pos and s.pos to match the amount + * of data that got filtered. + */ +func bcjApply(s *xzDecBCJ, buf []byte, pos *int) { + var filtered int + buf = buf[*pos:] + switch s.typ { + case idBCJX86: + filtered = bcjX86Filter(s, buf) + case idBCJPowerPC: + filtered = bcjPowerPCFilter(s, buf) + case idBCJIA64: + filtered = bcjIA64Filter(s, buf) + case idBCJARM: + filtered = bcjARMFilter(s, buf) + case idBCJARMThumb: + filtered = bcjARMThumbFilter(s, buf) + case idBCJSPARC: + filtered = bcjSPARCFilter(s, buf) + default: + /* Never reached */ + } + *pos += filtered + s.pos += filtered +} + +/* + * Flush pending filtered data from temp to the output buffer. + * Move the remaining mixture of possibly filtered and unfiltered + * data to the beginning of temp. + */ +func bcjFlush(s *xzDecBCJ, b *xzBuf) { + var copySize int + copySize = len(b.out) - b.outPos + if copySize > s.temp.filtered { + copySize = s.temp.filtered + } + copy(b.out[b.outPos:], s.temp.buf[:copySize]) + b.outPos += copySize + s.temp.filtered -= copySize + copy(s.temp.buf, s.temp.buf[copySize:]) + s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize] +} + +/* + * Decode raw stream which has a BCJ filter as the first filter. + * + * The BCJ filter functions are primitive in sense that they process the + * data in chunks of 1-16 bytes. To hide this issue, this function does + * some buffering. + */ +func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet { + var outStart int + /* + * Flush pending already filtered data to the output buffer. Return + * immediately if we couldn't flush everything, or if the next + * filter in the chain had already returned xzStreamEnd. + */ + if s.temp.filtered > 0 { + bcjFlush(s, b) + if s.temp.filtered > 0 { + return xzOK + } + if s.ret == xzStreamEnd { + return xzStreamEnd + } + } + /* + * If we have more output space than what is currently pending in + * temp, copy the unfiltered data from temp to the output buffer + * and try to fill the output buffer by decoding more data from the + * next filter in the chain. Apply the BCJ filter on the new data + * in the output buffer. If everything cannot be filtered, copy it + * to temp and rewind the output buffer position accordingly. + * + * This needs to be always run when len(temp.buf) == 0 to handle a special + * case where the output buffer is full and the next filter has no + * more output coming but hasn't returned xzStreamEnd yet. + */ + if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 { + outStart = b.outPos + copy(b.out[b.outPos:], s.temp.buf) + b.outPos += len(s.temp.buf) + s.ret = chain(b) + if s.ret != xzStreamEnd && s.ret != xzOK { + return s.ret + } + bcjApply(s, b.out[:b.outPos], &outStart) + /* + * As an exception, if the next filter returned xzStreamEnd, + * we can do that too, since the last few bytes that remain + * unfiltered are meant to remain unfiltered. + */ + if s.ret == xzStreamEnd { + return xzStreamEnd + } + s.temp.buf = s.temp.bufArray[:b.outPos-outStart] + b.outPos -= len(s.temp.buf) + copy(s.temp.buf, b.out[b.outPos:]) + /* + * If there wasn't enough input to the next filter to fill + * the output buffer with unfiltered data, there's no point + * to try decoding more data to temp. + */ + if b.outPos+len(s.temp.buf) < len(b.out) { + return xzOK + } + } + /* + * We have unfiltered data in temp. If the output buffer isn't full + * yet, try to fill the temp buffer by decoding more data from the + * next filter. Apply the BCJ filter on temp. Then we hopefully can + * fill the actual output buffer by copying filtered data from temp. + * A mix of filtered and unfiltered data may be left in temp; it will + * be taken care on the next call to this function. + */ + if b.outPos < len(b.out) { + /* Make b.out temporarily point to s.temp. */ + s.out = b.out + s.outPos = b.outPos + b.out = s.temp.bufArray[:] + b.outPos = len(s.temp.buf) + s.ret = chain(b) + s.temp.buf = s.temp.bufArray[:b.outPos] + b.out = s.out + b.outPos = s.outPos + if s.ret != xzOK && s.ret != xzStreamEnd { + return s.ret + } + bcjApply(s, s.temp.buf, &s.temp.filtered) + /* + * If the next filter returned xzStreamEnd, we mark that + * everything is filtered, since the last unfiltered bytes + * of the stream are meant to be left as is. + */ + if s.ret == xzStreamEnd { + s.temp.filtered = len(s.temp.buf) + } + bcjFlush(s, b) + if s.temp.filtered > 0 { + return xzOK + } + } + return s.ret +} + +/* + * Allocate memory for BCJ decoders. xzDecBCJReset must be used before + * calling xzDecBCJRun. + */ +func xzDecBCJCreate() *xzDecBCJ { + return new(xzDecBCJ) +} + +/* + * Decode the Filter ID of a BCJ filter and check the start offset is + * valid. Returns xzOK if the given Filter ID and offset is + * supported. Otherwise xzOptionsError is returned. + */ +func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet { + switch id { + case idBCJX86: + case idBCJPowerPC: + case idBCJIA64: + case idBCJARM: + case idBCJARMThumb: + case idBCJSPARC: + default: + /* Unsupported Filter ID */ + return xzOptionsError + } + // check offset is a multiple of alignment + switch id { + case idBCJPowerPC, idBCJARM, idBCJSPARC: + if offset%4 != 0 { + return xzOptionsError + } + case idBCJIA64: + if offset%16 != 0 { + return xzOptionsError + } + case idBCJARMThumb: + if offset%2 != 0 { + return xzOptionsError + } + } + s.typ = id + s.ret = xzOK + s.pos = offset + s.x86PrevMask = 0 + s.temp.filtered = 0 + s.temp.buf = nil + return xzOK +} diff --git a/src/vendor/github.com/xi2/xz/dec_delta.go b/src/vendor/github.com/xi2/xz/dec_delta.go new file mode 100644 index 00000000..19df5908 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_delta.go @@ -0,0 +1,55 @@ +/* + * Delta decoder + * + * Author: Lasse Collin + * + * Translation to Go: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +type xzDecDelta struct { + delta [256]byte + pos byte + distance int // in range [1, 256] +} + +/* + * Decode raw stream which has a delta filter as the first filter. + */ +func xzDecDeltaRun(s *xzDecDelta, b *xzBuf, chain func(*xzBuf) xzRet) xzRet { + outStart := b.outPos + ret := chain(b) + for i := outStart; i < b.outPos; i++ { + tmp := b.out[i] + s.delta[byte(s.distance+int(s.pos))] + s.delta[s.pos] = tmp + b.out[i] = tmp + s.pos-- + } + return ret +} + +/* + * Allocate memory for a delta decoder. xzDecDeltaReset must be used + * before calling xzDecDeltaRun. + */ +func xzDecDeltaCreate() *xzDecDelta { + return new(xzDecDelta) +} + +/* + * Returns xzOK if the given distance is valid. Otherwise + * xzOptionsError is returned. + */ +func xzDecDeltaReset(s *xzDecDelta, distance int) xzRet { + if distance < 1 || distance > 256 { + return xzOptionsError + } + s.delta = [256]byte{} + s.pos = 0 + s.distance = distance + return xzOK +} diff --git a/src/vendor/github.com/xi2/xz/dec_lzma2.go b/src/vendor/github.com/xi2/xz/dec_lzma2.go new file mode 100644 index 00000000..fa42e471 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_lzma2.go @@ -0,0 +1,1235 @@ +/* + * LZMA2 decoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * Translation to Go: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +/* from linux/lib/xz/xz_lzma2.h ***************************************/ + +/* Range coder constants */ +const ( + rcShiftBits = 8 + rcTopBits = 24 + rcTopValue = 1 << rcTopBits + rcBitModelTotalBits = 11 + rcBitModelTotal = 1 << rcBitModelTotalBits + rcMoveBits = 5 +) + +/* + * Maximum number of position states. A position state is the lowest pb + * number of bits of the current uncompressed offset. In some places there + * are different sets of probabilities for different position states. + */ +const posStatesMax = 1 << 4 + +/* + * lzmaState is used to track which LZMA symbols have occurred most recently + * and in which order. This information is used to predict the next symbol. + * + * Symbols: + * - Literal: One 8-bit byte + * - Match: Repeat a chunk of data at some distance + * - Long repeat: Multi-byte match at a recently seen distance + * - Short repeat: One-byte repeat at a recently seen distance + * + * The symbol names are in from STATE-oldest-older-previous. REP means + * either short or long repeated match, and NONLIT means any non-literal. + */ +type lzmaState int + +const ( + stateLitLit lzmaState = iota + stateMatchLitLit + stateRepLitLit + stateShortrepLitLit + stateMatchLit + stateRepList + stateShortrepLit + stateLitMatch + stateLitLongrep + stateLitShortrep + stateNonlitMatch + stateNonlitRep +) + +/* Total number of states */ +const states = 12 + +/* The lowest 7 states indicate that the previous state was a literal. */ +const litStates = 7 + +/* Indicate that the latest symbol was a literal. */ +func lzmaStateLiteral(state *lzmaState) { + switch { + case *state <= stateShortrepLitLit: + *state = stateLitLit + case *state <= stateLitShortrep: + *state -= 3 + default: + *state -= 6 + } +} + +/* Indicate that the latest symbol was a match. */ +func lzmaStateMatch(state *lzmaState) { + if *state < litStates { + *state = stateLitMatch + } else { + *state = stateNonlitMatch + } +} + +/* Indicate that the latest state was a long repeated match. */ +func lzmaStateLongRep(state *lzmaState) { + if *state < litStates { + *state = stateLitLongrep + } else { + *state = stateNonlitRep + } +} + +/* Indicate that the latest symbol was a short match. */ +func lzmaStateShortRep(state *lzmaState) { + if *state < litStates { + *state = stateLitShortrep + } else { + *state = stateNonlitRep + } +} + +/* Test if the previous symbol was a literal. */ +func lzmaStateIsLiteral(state lzmaState) bool { + return state < litStates +} + +/* Each literal coder is divided in three sections: + * - 0x001-0x0FF: Without match byte + * - 0x101-0x1FF: With match byte; match bit is 0 + * - 0x201-0x2FF: With match byte; match bit is 1 + * + * Match byte is used when the previous LZMA symbol was something else than + * a literal (that is, it was some kind of match). + */ +const literalCoderSize = 0x300 + +/* Maximum number of literal coders */ +const literalCodersMax = 1 << 4 + +/* Minimum length of a match is two bytes. */ +const matchLenMin = 2 + +/* Match length is encoded with 4, 5, or 10 bits. + * + * Length Bits + * 2-9 4 = Choice=0 + 3 bits + * 10-17 5 = Choice=1 + Choice2=0 + 3 bits + * 18-273 10 = Choice=1 + Choice2=1 + 8 bits + */ +const ( + lenLowBits = 3 + lenLowSymbols = 1 << lenLowBits + lenMidBits = 3 + lenMidSymbols = 1 << lenMidBits + lenHighBits = 8 + lenHighSymbols = 1 << lenHighBits +) + +/* + * Different sets of probabilities are used for match distances that have + * very short match length: Lengths of 2, 3, and 4 bytes have a separate + * set of probabilities for each length. The matches with longer length + * use a shared set of probabilities. + */ +const distStates = 4 + +/* + * Get the index of the appropriate probability array for decoding + * the distance slot. + */ +func lzmaGetDistState(len uint32) uint32 { + if len < distStates+matchLenMin { + return len - matchLenMin + } else { + return distStates - 1 + } +} + +/* + * The highest two bits of a 32-bit match distance are encoded using six bits. + * This six-bit value is called a distance slot. This way encoding a 32-bit + * value takes 6-36 bits, larger values taking more bits. + */ +const ( + distSlotBits = 6 + distSlots = 1 << distSlotBits +) + +/* Match distances up to 127 are fully encoded using probabilities. Since + * the highest two bits (distance slot) are always encoded using six bits, + * the distances 0-3 don't need any additional bits to encode, since the + * distance slot itself is the same as the actual distance. distModelStart + * indicates the first distance slot where at least one additional bit is + * needed. + */ +const distModelStart = 4 + +/* + * Match distances greater than 127 are encoded in three pieces: + * - distance slot: the highest two bits + * - direct bits: 2-26 bits below the highest two bits + * - alignment bits: four lowest bits + * + * Direct bits don't use any probabilities. + * + * The distance slot value of 14 is for distances 128-191. + */ +const distModelEnd = 14 + +/* Distance slots that indicate a distance <= 127. */ +const ( + fullDistancesBits = distModelEnd / 2 + fullDistances = 1 << fullDistancesBits +) + +/* + * For match distances greater than 127, only the highest two bits and the + * lowest four bits (alignment) is encoded using probabilities. + */ +const ( + alignBits = 4 + alignSize = 1 << alignBits +) + +/* from linux/lib/xz/xz_dec_lzma2.c ***********************************/ + +/* + * Range decoder initialization eats the first five bytes of each LZMA chunk. + */ +const rcInitBytes = 5 + +/* + * Minimum number of usable input buffer to safely decode one LZMA symbol. + * The worst case is that we decode 22 bits using probabilities and 26 + * direct bits. This may decode at maximum of 20 bytes of input. However, + * lzmaMain does an extra normalization before returning, thus we + * need to put 21 here. + */ +const lzmaInRequired = 21 + +/* + * Dictionary (history buffer) + * + * These are always true: + * start <= pos <= full <= end + * pos <= limit <= end + * end == size + * size <= sizeMax + * len(buf) <= size + */ +type dictionary struct { + /* The history buffer */ + buf []byte + /* Old position in buf (before decoding more data) */ + start uint32 + /* Position in buf */ + pos uint32 + /* + * How full dictionary is. This is used to detect corrupt input that + * would read beyond the beginning of the uncompressed stream. + */ + full uint32 + /* Write limit; we don't write to buf[limit] or later bytes. */ + limit uint32 + /* + * End of the dictionary buffer. This is the same as the + * dictionary size. + */ + end uint32 + /* + * Size of the dictionary as specified in Block Header. This is used + * together with "full" to detect corrupt input that would make us + * read beyond the beginning of the uncompressed stream. + */ + size uint32 + /* Maximum allowed dictionary size. */ + sizeMax uint32 +} + +/* Range decoder */ +type rcDec struct { + rnge uint32 + code uint32 + /* + * Number of initializing bytes remaining to be read + * by rcReadInit. + */ + initBytesLeft uint32 + /* + * Buffer from which we read our input. It can be either + * temp.buf or the caller-provided input buffer. + */ + in []byte + inPos int + inLimit int +} + +/* Probabilities for a length decoder. */ +type lzmaLenDec struct { + /* Probability of match length being at least 10 */ + choice uint16 + /* Probability of match length being at least 18 */ + choice2 uint16 + /* Probabilities for match lengths 2-9 */ + low [posStatesMax][lenLowSymbols]uint16 + /* Probabilities for match lengths 10-17 */ + mid [posStatesMax][lenMidSymbols]uint16 + /* Probabilities for match lengths 18-273 */ + high [lenHighSymbols]uint16 +} + +type lzmaDec struct { + /* Distances of latest four matches */ + rep0 uint32 + rep1 uint32 + rep2 uint32 + rep3 uint32 + /* Types of the most recently seen LZMA symbols */ + state lzmaState + /* + * Length of a match. This is updated so that dictRepeat can + * be called again to finish repeating the whole match. + */ + len uint32 + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask derived from the number of literal + * position bits, and a mask derived from the number + * position bits) + */ + lc uint32 + literalPosMask uint32 + posMask uint32 + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ + isMatch [states][posStatesMax]uint16 + /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ + isRep [states]uint16 + /* + * If 0, distance of a repeated match is rep0. + * Otherwise check is_rep1. + */ + isRep0 [states]uint16 + /* + * If 0, distance of a repeated match is rep1. + * Otherwise check is_rep2. + */ + isRep1 [states]uint16 + /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ + isRep2 [states]uint16 + /* + * If 1, the repeated match has length of one byte. Otherwise + * the length is decoded from rep_len_decoder. + */ + isRep0Long [states][posStatesMax]uint16 + /* + * Probability tree for the highest two bits of the match + * distance. There is a separate probability tree for match + * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + */ + distSlot [distStates][distSlots]uint16 + /* + * Probility trees for additional bits for match distance + * when the distance is in the range [4, 127]. + */ + distSpecial [fullDistances - distModelEnd]uint16 + /* + * Probability tree for the lowest four bits of a match + * distance that is equal to or greater than 128. + */ + distAlign [alignSize]uint16 + /* Length of a normal match */ + matchLenDec lzmaLenDec + /* Length of a repeated match */ + repLenDec lzmaLenDec + /* Probabilities of literals */ + literal [literalCodersMax][literalCoderSize]uint16 +} + +// type of lzma2Dec.sequence +type lzma2Seq int + +const ( + seqControl lzma2Seq = iota + seqUncompressed1 + seqUncompressed2 + seqCompressed0 + seqCompressed1 + seqProperties + seqLZMAPrepare + seqLZMARun + seqCopy +) + +type lzma2Dec struct { + /* Position in xzDecLZMA2Run. */ + sequence lzma2Seq + /* Next position after decoding the compressed size of the chunk. */ + nextSequence lzma2Seq + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uncompressed int + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + compressed int + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + needDictReset bool + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + needProps bool +} + +type xzDecLZMA2 struct { + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + rc rcDec + dict dictionary + lzma2 lzma2Dec + lzma lzmaDec + /* + * Temporary buffer which holds small number of input bytes between + * decoder calls. See lzma2LZMA for details. + */ + temp struct { + buf []byte // slice buf will be backed by bufArray + bufArray [3 * lzmaInRequired]byte + } +} + +/************** + * Dictionary * + **************/ + +/* + * Reset the dictionary state. When in single-call mode, set up the beginning + * of the dictionary to point to the actual output buffer. + */ +func dictReset(dict *dictionary, b *xzBuf) { + dict.start = 0 + dict.pos = 0 + dict.limit = 0 + dict.full = 0 +} + +/* Set dictionary write limit */ +func dictLimit(dict *dictionary, outMax int) { + if dict.end-dict.pos <= uint32(outMax) { + dict.limit = dict.end + } else { + dict.limit = dict.pos + uint32(outMax) + } +} + +/* Return true if at least one byte can be written into the dictionary. */ +func dictHasSpace(dict *dictionary) bool { + return dict.pos < dict.limit +} + +/* + * Get a byte from the dictionary at the given distance. The distance is + * assumed to valid, or as a special case, zero when the dictionary is + * still empty. This special case is needed for single-call decoding to + * avoid writing a '\x00' to the end of the destination buffer. + */ +func dictGet(dict *dictionary, dist uint32) uint32 { + var offset uint32 = dict.pos - dist - 1 + if dist >= dict.pos { + offset += dict.end + } + if dict.full > 0 { + return uint32(dict.buf[offset]) + } + return 0 +} + +/* + * Put one byte into the dictionary. It is assumed that there is space for it. + */ +func dictPut(dict *dictionary, byte byte) { + dict.buf[dict.pos] = byte + dict.pos++ + if dict.full < dict.pos { + dict.full = dict.pos + } +} + +/* + * Repeat given number of bytes from the given distance. If the distance is + * invalid, false is returned. On success, true is returned and *len is + * updated to indicate how many bytes were left to be repeated. + */ +func dictRepeat(dict *dictionary, len *uint32, dist uint32) bool { + var back uint32 + var left uint32 + if dist >= dict.full || dist >= dict.size { + return false + } + left = dict.limit - dict.pos + if left > *len { + left = *len + } + *len -= left + back = dict.pos - dist - 1 + if dist >= dict.pos { + back += dict.end + } + for { + dict.buf[dict.pos] = dict.buf[back] + dict.pos++ + back++ + if back == dict.end { + back = 0 + } + left-- + if !(left > 0) { + break + } + } + if dict.full < dict.pos { + dict.full = dict.pos + } + return true +} + +/* Copy uncompressed data as is from input to dictionary and output buffers. */ +func dictUncompressed(dict *dictionary, b *xzBuf, left *int) { + var copySize int + for *left > 0 && b.inPos < len(b.in) && b.outPos < len(b.out) { + copySize = len(b.in) - b.inPos + if copySize > len(b.out)-b.outPos { + copySize = len(b.out) - b.outPos + } + if copySize > int(dict.end-dict.pos) { + copySize = int(dict.end - dict.pos) + } + if copySize > *left { + copySize = *left + } + *left -= copySize + copy(dict.buf[dict.pos:], b.in[b.inPos:b.inPos+copySize]) + dict.pos += uint32(copySize) + if dict.full < dict.pos { + dict.full = dict.pos + } + if dict.pos == dict.end { + dict.pos = 0 + } + copy(b.out[b.outPos:], b.in[b.inPos:b.inPos+copySize]) + dict.start = dict.pos + b.outPos += copySize + b.inPos += copySize + } +} + +/* + * Flush pending data from dictionary to b.out. It is assumed that there is + * enough space in b.out. This is guaranteed because caller uses dictLimit + * before decoding data into the dictionary. + */ +func dictFlush(dict *dictionary, b *xzBuf) int { + var copySize int = int(dict.pos - dict.start) + if dict.pos == dict.end { + dict.pos = 0 + } + copy(b.out[b.outPos:], dict.buf[dict.start:dict.start+uint32(copySize)]) + dict.start = dict.pos + b.outPos += copySize + return copySize +} + +/***************** + * Range decoder * + *****************/ + +/* Reset the range decoder. */ +func rcReset(rc *rcDec) { + rc.rnge = ^uint32(0) + rc.code = 0 + rc.initBytesLeft = rcInitBytes +} + +/* + * Read the first five initial bytes into rc->code if they haven't been + * read already. (Yes, the first byte gets completely ignored.) + */ +func rcReadInit(rc *rcDec, b *xzBuf) bool { + for rc.initBytesLeft > 0 { + if b.inPos == len(b.in) { + return false + } + rc.code = rc.code<<8 + uint32(b.in[b.inPos]) + b.inPos++ + rc.initBytesLeft-- + } + return true +} + +/* Return true if there may not be enough input for the next decoding loop. */ +func rcLimitExceeded(rc *rcDec) bool { + return rc.inPos > rc.inLimit +} + +/* + * Return true if it is possible (from point of view of range decoder) that + * we have reached the end of the LZMA chunk. + */ +func rcIsFinished(rc *rcDec) bool { + return rc.code == 0 +} + +/* Read the next input byte if needed. */ +func rcNormalize(rc *rcDec) { + if rc.rnge < rcTopValue { + rc.rnge <<= rcShiftBits + rc.code = rc.code<> rcBitModelTotalBits) * uint32(*prob) + if rc.code < bound { + rc.rnge = bound + *prob += (rcBitModelTotal - *prob) >> rcMoveBits + bit = false + } else { + rc.rnge -= bound + rc.code -= bound + *prob -= *prob >> rcMoveBits + bit = true + } + return bit +} + +/* Decode a bittree starting from the most significant bit. */ +func rcBittree(rc *rcDec, probs []uint16, limit uint32) uint32 { + var symbol uint32 = 1 + for { + if rcBit(rc, &probs[symbol-1]) { + symbol = symbol<<1 + 1 + } else { + symbol <<= 1 + } + if !(symbol < limit) { + break + } + } + return symbol +} + +/* Decode a bittree starting from the least significant bit. */ +func rcBittreeReverse(rc *rcDec, probs []uint16, dest *uint32, limit uint32) { + var symbol uint32 = 1 + var i uint32 = 0 + for { + if rcBit(rc, &probs[symbol-1]) { + symbol = symbol<<1 + 1 + *dest += 1 << i + } else { + symbol <<= 1 + } + i++ + if !(i < limit) { + break + } + } +} + +/* Decode direct bits (fixed fifty-fifty probability) */ +func rcDirect(rc *rcDec, dest *uint32, limit uint32) { + var mask uint32 + for { + rcNormalize(rc) + rc.rnge >>= 1 + rc.code -= rc.rnge + mask = 0 - rc.code>>31 + rc.code += rc.rnge & mask + *dest = *dest<<1 + mask + 1 + limit-- + if !(limit > 0) { + break + } + } +} + +/******** + * LZMA * + ********/ + +/* Get pointer to literal coder probability array. */ +func lzmaLiteralProbs(s *xzDecLZMA2) []uint16 { + var prevByte uint32 = dictGet(&s.dict, 0) + var low uint32 = prevByte >> (8 - s.lzma.lc) + var high uint32 = (s.dict.pos & s.lzma.literalPosMask) << s.lzma.lc + return s.lzma.literal[low+high][:] +} + +/* Decode a literal (one 8-bit byte) */ +func lzmaLiteral(s *xzDecLZMA2) { + var probs []uint16 + var symbol uint32 + var matchByte uint32 + var matchBit uint32 + var offset uint32 + var i uint32 + probs = lzmaLiteralProbs(s) + if lzmaStateIsLiteral(s.lzma.state) { + symbol = rcBittree(&s.rc, probs[1:], 0x100) + } else { + symbol = 1 + matchByte = dictGet(&s.dict, s.lzma.rep0) << 1 + offset = 0x100 + for { + matchBit = matchByte & offset + matchByte <<= 1 + i = offset + matchBit + symbol + if rcBit(&s.rc, &probs[i]) { + symbol = symbol<<1 + 1 + offset &= matchBit + } else { + symbol <<= 1 + offset &= ^matchBit + } + if !(symbol < 0x100) { + break + } + } + } + dictPut(&s.dict, byte(symbol)) + lzmaStateLiteral(&s.lzma.state) +} + +/* Decode the length of the match into s.lzma.len. */ +func lzmaLen(s *xzDecLZMA2, l *lzmaLenDec, posState uint32) { + var probs []uint16 + var limit uint32 + switch { + case !rcBit(&s.rc, &l.choice): + probs = l.low[posState][:] + limit = lenLowSymbols + s.lzma.len = matchLenMin + case !rcBit(&s.rc, &l.choice2): + probs = l.mid[posState][:] + limit = lenMidSymbols + s.lzma.len = matchLenMin + lenLowSymbols + default: + probs = l.high[:] + limit = lenHighSymbols + s.lzma.len = matchLenMin + lenLowSymbols + lenMidSymbols + } + s.lzma.len += rcBittree(&s.rc, probs[1:], limit) - limit +} + +/* Decode a match. The distance will be stored in s.lzma.rep0. */ +func lzmaMatch(s *xzDecLZMA2, posState uint32) { + var probs []uint16 + var distSlot uint32 + var limit uint32 + lzmaStateMatch(&s.lzma.state) + s.lzma.rep3 = s.lzma.rep2 + s.lzma.rep2 = s.lzma.rep1 + s.lzma.rep1 = s.lzma.rep0 + lzmaLen(s, &s.lzma.matchLenDec, posState) + probs = s.lzma.distSlot[lzmaGetDistState(s.lzma.len)][:] + distSlot = rcBittree(&s.rc, probs[1:], distSlots) - distSlots + if distSlot < distModelStart { + s.lzma.rep0 = distSlot + } else { + limit = distSlot>>1 - 1 + s.lzma.rep0 = 2 + distSlot&1 + if distSlot < distModelEnd { + s.lzma.rep0 <<= limit + probs = s.lzma.distSpecial[s.lzma.rep0-distSlot:] + rcBittreeReverse(&s.rc, probs, &s.lzma.rep0, limit) + } else { + rcDirect(&s.rc, &s.lzma.rep0, limit-alignBits) + s.lzma.rep0 <<= alignBits + rcBittreeReverse( + &s.rc, s.lzma.distAlign[1:], &s.lzma.rep0, alignBits) + } + } +} + +/* + * Decode a repeated match. The distance is one of the four most recently + * seen matches. The distance will be stored in s.lzma.rep0. + */ +func lzmaRepMatch(s *xzDecLZMA2, posState uint32) { + var tmp uint32 + if !rcBit(&s.rc, &s.lzma.isRep0[s.lzma.state]) { + if !rcBit(&s.rc, &s.lzma.isRep0Long[s.lzma.state][posState]) { + lzmaStateShortRep(&s.lzma.state) + s.lzma.len = 1 + return + } + } else { + if !rcBit(&s.rc, &s.lzma.isRep1[s.lzma.state]) { + tmp = s.lzma.rep1 + } else { + if !rcBit(&s.rc, &s.lzma.isRep2[s.lzma.state]) { + tmp = s.lzma.rep2 + } else { + tmp = s.lzma.rep3 + s.lzma.rep3 = s.lzma.rep2 + } + s.lzma.rep2 = s.lzma.rep1 + } + s.lzma.rep1 = s.lzma.rep0 + s.lzma.rep0 = tmp + } + lzmaStateLongRep(&s.lzma.state) + lzmaLen(s, &s.lzma.repLenDec, posState) +} + +/* LZMA decoder core */ +func lzmaMain(s *xzDecLZMA2) bool { + var posState uint32 + /* + * If the dictionary was reached during the previous call, try to + * finish the possibly pending repeat in the dictionary. + */ + if dictHasSpace(&s.dict) && s.lzma.len > 0 { + dictRepeat(&s.dict, &s.lzma.len, s.lzma.rep0) + } + /* + * Decode more LZMA symbols. One iteration may consume up to + * lzmaInRequired - 1 bytes. + */ + for dictHasSpace(&s.dict) && !rcLimitExceeded(&s.rc) { + posState = s.dict.pos & s.lzma.posMask + if !rcBit(&s.rc, &s.lzma.isMatch[s.lzma.state][posState]) { + lzmaLiteral(s) + } else { + if rcBit(&s.rc, &s.lzma.isRep[s.lzma.state]) { + lzmaRepMatch(s, posState) + } else { + lzmaMatch(s, posState) + } + if !dictRepeat(&s.dict, &s.lzma.len, s.lzma.rep0) { + return false + } + } + } + /* + * Having the range decoder always normalized when we are outside + * this function makes it easier to correctly handle end of the chunk. + */ + rcNormalize(&s.rc) + return true +} + +/* + * Reset the LZMA decoder and range decoder state. Dictionary is not reset + * here, because LZMA state may be reset without resetting the dictionary. + */ +func lzmaReset(s *xzDecLZMA2) { + s.lzma.state = stateLitLit + s.lzma.rep0 = 0 + s.lzma.rep1 = 0 + s.lzma.rep2 = 0 + s.lzma.rep3 = 0 + /* All probabilities are initialized to the same value, v */ + v := uint16(rcBitModelTotal / 2) + s.lzma.matchLenDec.choice = v + s.lzma.matchLenDec.choice2 = v + s.lzma.repLenDec.choice = v + s.lzma.repLenDec.choice2 = v + for _, m := range [][]uint16{ + s.lzma.isRep[:], s.lzma.isRep0[:], s.lzma.isRep1[:], + s.lzma.isRep2[:], s.lzma.distSpecial[:], s.lzma.distAlign[:], + s.lzma.matchLenDec.high[:], s.lzma.repLenDec.high[:], + } { + for j := range m { + m[j] = v + } + } + for i := range s.lzma.isMatch { + for j := range s.lzma.isMatch[i] { + s.lzma.isMatch[i][j] = v + } + } + for i := range s.lzma.isRep0Long { + for j := range s.lzma.isRep0Long[i] { + s.lzma.isRep0Long[i][j] = v + } + } + for i := range s.lzma.distSlot { + for j := range s.lzma.distSlot[i] { + s.lzma.distSlot[i][j] = v + } + } + for i := range s.lzma.literal { + for j := range s.lzma.literal[i] { + s.lzma.literal[i][j] = v + } + } + for i := range s.lzma.matchLenDec.low { + for j := range s.lzma.matchLenDec.low[i] { + s.lzma.matchLenDec.low[i][j] = v + } + } + for i := range s.lzma.matchLenDec.mid { + for j := range s.lzma.matchLenDec.mid[i] { + s.lzma.matchLenDec.mid[i][j] = v + } + } + for i := range s.lzma.repLenDec.low { + for j := range s.lzma.repLenDec.low[i] { + s.lzma.repLenDec.low[i][j] = v + } + } + for i := range s.lzma.repLenDec.mid { + for j := range s.lzma.repLenDec.mid[i] { + s.lzma.repLenDec.mid[i][j] = v + } + } + rcReset(&s.rc) +} + +/* + * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks + * from the decoded lp and pb values. On success, the LZMA decoder state is + * reset and true is returned. + */ +func lzmaProps(s *xzDecLZMA2, props byte) bool { + if props > (4*5+4)*9+8 { + return false + } + s.lzma.posMask = 0 + for props >= 9*5 { + props -= 9 * 5 + s.lzma.posMask++ + } + s.lzma.posMask = 1<= 9 { + props -= 9 + s.lzma.literalPosMask++ + } + s.lzma.lc = uint32(props) + if s.lzma.lc+s.lzma.literalPosMask > 4 { + return false + } + s.lzma.literalPosMask = 1< 0 || s.lzma2.compressed == 0 { + tmp = 2*lzmaInRequired - len(s.temp.buf) + if tmp > s.lzma2.compressed-len(s.temp.buf) { + tmp = s.lzma2.compressed - len(s.temp.buf) + } + if tmp > inAvail { + tmp = inAvail + } + copy(s.temp.bufArray[len(s.temp.buf):], b.in[b.inPos:b.inPos+tmp]) + switch { + case len(s.temp.buf)+tmp == s.lzma2.compressed: + for i := len(s.temp.buf) + tmp; i < len(s.temp.bufArray); i++ { + s.temp.bufArray[i] = 0 + } + s.rc.inLimit = len(s.temp.buf) + tmp + case len(s.temp.buf)+tmp < lzmaInRequired: + s.temp.buf = s.temp.bufArray[:len(s.temp.buf)+tmp] + b.inPos += tmp + return true + default: + s.rc.inLimit = len(s.temp.buf) + tmp - lzmaInRequired + } + s.rc.in = s.temp.bufArray[:] + s.rc.inPos = 0 + if !lzmaMain(s) || s.rc.inPos > len(s.temp.buf)+tmp { + return false + } + s.lzma2.compressed -= s.rc.inPos + if s.rc.inPos < len(s.temp.buf) { + copy(s.temp.buf, s.temp.buf[s.rc.inPos:]) + s.temp.buf = s.temp.buf[:len(s.temp.buf)-s.rc.inPos] + return true + } + b.inPos += s.rc.inPos - len(s.temp.buf) + s.temp.buf = nil + } + inAvail = len(b.in) - b.inPos + if inAvail >= lzmaInRequired { + s.rc.in = b.in + s.rc.inPos = b.inPos + if inAvail >= s.lzma2.compressed+lzmaInRequired { + s.rc.inLimit = b.inPos + s.lzma2.compressed + } else { + s.rc.inLimit = len(b.in) - lzmaInRequired + } + if !lzmaMain(s) { + return false + } + inAvail = s.rc.inPos - b.inPos + if inAvail > s.lzma2.compressed { + return false + } + s.lzma2.compressed -= inAvail + b.inPos = s.rc.inPos + } + inAvail = len(b.in) - b.inPos + if inAvail < lzmaInRequired { + if inAvail > s.lzma2.compressed { + inAvail = s.lzma2.compressed + } + s.temp.buf = s.temp.bufArray[:inAvail] + copy(s.temp.buf, b.in[b.inPos:]) + b.inPos += inAvail + } + return true +} + +/* + * Take care of the LZMA2 control layer, and forward the job of actual LZMA + * decoding or copying of uncompressed chunks to other functions. + */ +func xzDecLZMA2Run(s *xzDecLZMA2, b *xzBuf) xzRet { + var tmp int + for b.inPos < len(b.in) || s.lzma2.sequence == seqLZMARun { + switch s.lzma2.sequence { + case seqControl: + /* + * LZMA2 control byte + * + * Exact values: + * 0x00 End marker + * 0x01 Dictionary reset followed by + * an uncompressed chunk + * 0x02 Uncompressed chunk (no dictionary reset) + * + * Highest three bits (s.control & 0xE0): + * 0xE0 Dictionary reset, new properties and state + * reset, followed by LZMA compressed chunk + * 0xC0 New properties and state reset, followed + * by LZMA compressed chunk (no dictionary + * reset) + * 0xA0 State reset using old properties, + * followed by LZMA compressed chunk (no + * dictionary reset) + * 0x80 LZMA chunk (no dictionary or state reset) + * + * For LZMA compressed chunks, the lowest five bits + * (s.control & 1F) are the highest bits of the + * uncompressed size (bits 16-20). + * + * A new LZMA2 stream must begin with a dictionary + * reset. The first LZMA chunk must set new + * properties and reset the LZMA state. + * + * Values that don't match anything described above + * are invalid and we return xzDataError. + */ + tmp = int(b.in[b.inPos]) + b.inPos++ + if tmp == 0x00 { + return xzStreamEnd + } + switch { + case tmp >= 0xe0 || tmp == 0x01: + s.lzma2.needProps = true + s.lzma2.needDictReset = false + dictReset(&s.dict, b) + case s.lzma2.needDictReset: + return xzDataError + } + if tmp >= 0x80 { + s.lzma2.uncompressed = (tmp & 0x1f) << 16 + s.lzma2.sequence = seqUncompressed1 + switch { + case tmp >= 0xc0: + /* + * When there are new properties, + * state reset is done at + * seqProperties. + */ + s.lzma2.needProps = false + s.lzma2.nextSequence = seqProperties + case s.lzma2.needProps: + return xzDataError + default: + s.lzma2.nextSequence = seqLZMAPrepare + if tmp >= 0xa0 { + lzmaReset(s) + } + } + } else { + if tmp > 0x02 { + return xzDataError + } + s.lzma2.sequence = seqCompressed0 + s.lzma2.nextSequence = seqCopy + } + case seqUncompressed1: + s.lzma2.uncompressed += int(b.in[b.inPos]) << 8 + b.inPos++ + s.lzma2.sequence = seqUncompressed2 + case seqUncompressed2: + s.lzma2.uncompressed += int(b.in[b.inPos]) + 1 + b.inPos++ + s.lzma2.sequence = seqCompressed0 + case seqCompressed0: + s.lzma2.compressed += int(b.in[b.inPos]) << 8 + b.inPos++ + s.lzma2.sequence = seqCompressed1 + case seqCompressed1: + s.lzma2.compressed += int(b.in[b.inPos]) + 1 + b.inPos++ + s.lzma2.sequence = s.lzma2.nextSequence + case seqProperties: + if !lzmaProps(s, b.in[b.inPos]) { + return xzDataError + } + b.inPos++ + s.lzma2.sequence = seqLZMAPrepare + fallthrough + case seqLZMAPrepare: + if s.lzma2.compressed < rcInitBytes { + return xzDataError + } + if !rcReadInit(&s.rc, b) { + return xzOK + } + s.lzma2.compressed -= rcInitBytes + s.lzma2.sequence = seqLZMARun + fallthrough + case seqLZMARun: + /* + * Set dictionary limit to indicate how much we want + * to be encoded at maximum. Decode new data into the + * dictionary. Flush the new data from dictionary to + * b.out. Check if we finished decoding this chunk. + * In case the dictionary got full but we didn't fill + * the output buffer yet, we may run this loop + * multiple times without changing s.lzma2.sequence. + */ + outMax := len(b.out) - b.outPos + if outMax > s.lzma2.uncompressed { + outMax = s.lzma2.uncompressed + } + dictLimit(&s.dict, outMax) + if !lzma2LZMA(s, b) { + return xzDataError + } + s.lzma2.uncompressed -= dictFlush(&s.dict, b) + switch { + case s.lzma2.uncompressed == 0: + if s.lzma2.compressed > 0 || s.lzma.len > 0 || + !rcIsFinished(&s.rc) { + return xzDataError + } + rcReset(&s.rc) + s.lzma2.sequence = seqControl + case b.outPos == len(b.out) || + b.inPos == len(b.in) && + len(s.temp.buf) < s.lzma2.compressed: + return xzOK + } + case seqCopy: + dictUncompressed(&s.dict, b, &s.lzma2.compressed) + if s.lzma2.compressed > 0 { + return xzOK + } + s.lzma2.sequence = seqControl + } + } + return xzOK +} + +/* + * Allocate memory for LZMA2 decoder. xzDecLZMA2Reset must be used + * before calling xzDecLZMA2Run. + */ +func xzDecLZMA2Create(dictMax uint32) *xzDecLZMA2 { + s := new(xzDecLZMA2) + s.dict.sizeMax = dictMax + return s +} + +/* + * Decode the LZMA2 properties (one byte) and reset the decoder. Return + * xzOK on success, xzMemlimitError if the preallocated dictionary is not + * big enough, and xzOptionsError if props indicates something that this + * decoder doesn't support. + */ +func xzDecLZMA2Reset(s *xzDecLZMA2, props byte) xzRet { + if props > 40 { + return xzOptionsError // Bigger than 4 GiB + } + if props == 40 { + s.dict.size = ^uint32(0) + } else { + s.dict.size = uint32(2 + props&1) + s.dict.size <<= props>>1 + 11 + } + if s.dict.size > s.dict.sizeMax { + return xzMemlimitError + } + s.dict.end = s.dict.size + if len(s.dict.buf) < int(s.dict.size) { + s.dict.buf = make([]byte, s.dict.size) + } + s.lzma.len = 0 + s.lzma2.sequence = seqControl + s.lzma2.compressed = 0 + s.lzma2.uncompressed = 0 + s.lzma2.needDictReset = true + s.temp.buf = nil + return xzOK +} diff --git a/src/vendor/github.com/xi2/xz/dec_stream.go b/src/vendor/github.com/xi2/xz/dec_stream.go new file mode 100644 index 00000000..9381a3c8 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_stream.go @@ -0,0 +1,932 @@ +/* + * .xz Stream decoder + * + * Author: Lasse Collin + * + * Translation to Go: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +import ( + "bytes" + "crypto/sha256" + "hash" + "hash/crc32" + "hash/crc64" +) + +/* from linux/lib/xz/xz_stream.h **************************************/ + +/* + * See the .xz file format specification at + * http://tukaani.org/xz/xz-file-format.txt + * to understand the container format. + */ +const ( + streamHeaderSize = 12 + headerMagic = "\xfd7zXZ\x00" + footerMagic = "YZ" +) + +/* + * Variable-length integer can hold a 63-bit unsigned integer or a special + * value indicating that the value is unknown. + */ +type vliType uint64 + +const ( + vliUnknown vliType = ^vliType(0) + /* Maximum encoded size of a VLI */ + vliBytesMax = 8 * 8 / 7 // (Sizeof(vliType) * 8 / 7) +) + +/* from linux/lib/xz/xz_dec_stream.c **********************************/ + +/* Hash used to validate the Index field */ +type xzDecHash struct { + unpadded vliType + uncompressed vliType + sha256 hash.Hash +} + +// type of xzDec.sequence +type xzDecSeq int + +const ( + seqStreamHeader xzDecSeq = iota + seqBlockStart + seqBlockHeader + seqBlockUncompress + seqBlockPadding + seqBlockCheck + seqIndex + seqIndexPadding + seqIndexCRC32 + seqStreamFooter +) + +// type of xzDec.index.sequence +type xzDecIndexSeq int + +const ( + seqIndexCount xzDecIndexSeq = iota + seqIndexUnpadded + seqIndexUncompressed +) + +/** + * xzDec - Opaque type to hold the XZ decoder state + */ +type xzDec struct { + /* Position in decMain */ + sequence xzDecSeq + /* Position in variable-length integers and Check fields */ + pos int + /* Variable-length integer decoded by decVLI */ + vli vliType + /* Saved inPos and outPos */ + inStart int + outStart int + /* CRC32 checksum hash used in Index */ + crc32 hash.Hash + /* Hashes used in Blocks */ + checkCRC32 hash.Hash + checkCRC64 hash.Hash + checkSHA256 hash.Hash + /* for checkTypes CRC32/CRC64/SHA256, check is one of the above 3 hashes */ + check hash.Hash + /* Embedded stream header struct containing CheckType */ + *Header + /* + * True if the next call to xzDecRun is allowed to return + * xzBufError. + */ + allowBufError bool + /* Information stored in Block Header */ + blockHeader struct { + /* + * Value stored in the Compressed Size field, or + * vliUnknown if Compressed Size is not present. + */ + compressed vliType + /* + * Value stored in the Uncompressed Size field, or + * vliUnknown if Uncompressed Size is not present. + */ + uncompressed vliType + /* Size of the Block Header field */ + size int + } + /* Information collected when decoding Blocks */ + block struct { + /* Observed compressed size of the current Block */ + compressed vliType + /* Observed uncompressed size of the current Block */ + uncompressed vliType + /* Number of Blocks decoded so far */ + count vliType + /* + * Hash calculated from the Block sizes. This is used to + * validate the Index field. + */ + hash xzDecHash + } + /* Variables needed when verifying the Index field */ + index struct { + /* Position in decIndex */ + sequence xzDecIndexSeq + /* Size of the Index in bytes */ + size vliType + /* Number of Records (matches block.count in valid files) */ + count vliType + /* + * Hash calculated from the Records (matches block.hash in + * valid files). + */ + hash xzDecHash + } + /* + * Temporary buffer needed to hold Stream Header, Block Header, + * and Stream Footer. The Block Header is the biggest (1 KiB) + * so we reserve space according to that. bufArray has to be aligned + * to a multiple of four bytes; the variables before it + * should guarantee this. + */ + temp struct { + pos int + buf []byte // slice buf will be backed by bufArray + bufArray [1024]byte + } + // chain is the function (or to be more precise, closure) which + // does the decompression and will call into the lzma2 and other + // filter code as needed. It is constructed by decBlockHeader + chain func(b *xzBuf) xzRet + // lzma2 holds the state of the last filter (which must be LZMA2) + lzma2 *xzDecLZMA2 + // pointers to allocated BCJ/Delta filters + bcjs []*xzDecBCJ + deltas []*xzDecDelta + // number of currently in use BCJ/Delta filters from the above + bcjsUsed int + deltasUsed int +} + +/* Sizes of the Check field with different Check IDs */ +var checkSizes = [...]byte{ + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64, +} + +/* + * Fill s.temp by copying data starting from b.in[b.inPos]. Caller + * must have set s.temp.pos to indicate how much data we are supposed + * to copy into s.temp.buf. Return true once s.temp.pos has reached + * len(s.temp.buf). + */ +func fillTemp(s *xzDec, b *xzBuf) bool { + copySize := len(b.in) - b.inPos + tempRemaining := len(s.temp.buf) - s.temp.pos + if copySize > tempRemaining { + copySize = tempRemaining + } + copy(s.temp.buf[s.temp.pos:], b.in[b.inPos:]) + b.inPos += copySize + s.temp.pos += copySize + if s.temp.pos == len(s.temp.buf) { + s.temp.pos = 0 + return true + } + return false +} + +/* Decode a variable-length integer (little-endian base-128 encoding) */ +func decVLI(s *xzDec, in []byte, inPos *int) xzRet { + var byte byte + if s.pos == 0 { + s.vli = 0 + } + for *inPos < len(in) { + byte = in[*inPos] + *inPos++ + s.vli |= vliType(byte&0x7f) << uint(s.pos) + if byte&0x80 == 0 { + /* Don't allow non-minimal encodings. */ + if byte == 0 && s.pos != 0 { + return xzDataError + } + s.pos = 0 + return xzStreamEnd + } + s.pos += 7 + if s.pos == 7*vliBytesMax { + return xzDataError + } + } + return xzOK +} + +/* + * Decode the Compressed Data field from a Block. Update and validate + * the observed compressed and uncompressed sizes of the Block so that + * they don't exceed the values possibly stored in the Block Header + * (validation assumes that no integer overflow occurs, since vliType + * is uint64). Update s.check if presence of the CRC32/CRC64/SHA256 + * field was indicated in Stream Header. + * + * Once the decoding is finished, validate that the observed sizes match + * the sizes possibly stored in the Block Header. Update the hash and + * Block count, which are later used to validate the Index field. + */ +func decBlock(s *xzDec, b *xzBuf) xzRet { + var ret xzRet + s.inStart = b.inPos + s.outStart = b.outPos + ret = s.chain(b) + s.block.compressed += vliType(b.inPos - s.inStart) + s.block.uncompressed += vliType(b.outPos - s.outStart) + /* + * There is no need to separately check for vliUnknown since + * the observed sizes are always smaller than vliUnknown. + */ + if s.block.compressed > s.blockHeader.compressed || + s.block.uncompressed > s.blockHeader.uncompressed { + return xzDataError + } + switch s.CheckType { + case CheckCRC32, CheckCRC64, CheckSHA256: + _, _ = s.check.Write(b.out[s.outStart:b.outPos]) + } + if ret == xzStreamEnd { + if s.blockHeader.compressed != vliUnknown && + s.blockHeader.compressed != s.block.compressed { + return xzDataError + } + if s.blockHeader.uncompressed != vliUnknown && + s.blockHeader.uncompressed != s.block.uncompressed { + return xzDataError + } + s.block.hash.unpadded += + vliType(s.blockHeader.size) + s.block.compressed + s.block.hash.unpadded += vliType(checkSizes[s.CheckType]) + s.block.hash.uncompressed += s.block.uncompressed + var buf [2 * 8]byte // 2*Sizeof(vliType) + putLE64(uint64(s.block.hash.unpadded), buf[:]) + putLE64(uint64(s.block.hash.uncompressed), buf[8:]) + _, _ = s.block.hash.sha256.Write(buf[:]) + s.block.count++ + } + return ret +} + +/* Update the Index size and the CRC32 hash. */ +func indexUpdate(s *xzDec, b *xzBuf) { + inUsed := b.inPos - s.inStart + s.index.size += vliType(inUsed) + _, _ = s.crc32.Write(b.in[s.inStart : s.inStart+inUsed]) +} + +/* + * Decode the Number of Records, Unpadded Size, and Uncompressed Size + * fields from the Index field. That is, Index Padding and CRC32 are not + * decoded by this function. + * + * This can return xzOK (more input needed), xzStreamEnd (everything + * successfully decoded), or xzDataError (input is corrupt). + */ +func decIndex(s *xzDec, b *xzBuf) xzRet { + var ret xzRet + for { + ret = decVLI(s, b.in, &b.inPos) + if ret != xzStreamEnd { + indexUpdate(s, b) + return ret + } + switch s.index.sequence { + case seqIndexCount: + s.index.count = s.vli + /* + * Validate that the Number of Records field + * indicates the same number of Records as + * there were Blocks in the Stream. + */ + if s.index.count != s.block.count { + return xzDataError + } + s.index.sequence = seqIndexUnpadded + case seqIndexUnpadded: + s.index.hash.unpadded += s.vli + s.index.sequence = seqIndexUncompressed + case seqIndexUncompressed: + s.index.hash.uncompressed += s.vli + var buf [2 * 8]byte // 2*Sizeof(vliType) + putLE64(uint64(s.index.hash.unpadded), buf[:]) + putLE64(uint64(s.index.hash.uncompressed), buf[8:]) + _, _ = s.index.hash.sha256.Write(buf[:]) + s.index.count-- + s.index.sequence = seqIndexUnpadded + } + if !(s.index.count > 0) { + break + } + } + return xzStreamEnd +} + +/* + * Validate that the next 4 bytes match s.crc32.Sum(nil). s.pos must + * be zero when starting to validate the first byte. + */ +func crcValidate(s *xzDec, b *xzBuf) xzRet { + sum := s.crc32.Sum(nil) + // CRC32 - reverse slice + sum[0], sum[1], sum[2], sum[3] = sum[3], sum[2], sum[1], sum[0] + for { + if b.inPos == len(b.in) { + return xzOK + } + if sum[s.pos] != b.in[b.inPos] { + return xzDataError + } + b.inPos++ + s.pos++ + if !(s.pos < 4) { + break + } + } + s.crc32.Reset() + s.pos = 0 + return xzStreamEnd +} + +/* + * Validate that the next 4/8/32 bytes match s.check.Sum(nil). s.pos + * must be zero when starting to validate the first byte. + */ +func checkValidate(s *xzDec, b *xzBuf) xzRet { + sum := s.check.Sum(nil) + if s.CheckType == CheckCRC32 || s.CheckType == CheckCRC64 { + // CRC32/64 - reverse slice + for i, j := 0, len(sum)-1; i < j; i, j = i+1, j-1 { + sum[i], sum[j] = sum[j], sum[i] + } + } + for { + if b.inPos == len(b.in) { + return xzOK + } + if sum[s.pos] != b.in[b.inPos] { + return xzDataError + } + b.inPos++ + s.pos++ + if !(s.pos < len(sum)) { + break + } + } + s.check.Reset() + s.pos = 0 + return xzStreamEnd +} + +/* + * Skip over the Check field when the Check ID is not supported. + * Returns true once the whole Check field has been skipped over. + */ +func checkSkip(s *xzDec, b *xzBuf) bool { + for s.pos < int(checkSizes[s.CheckType]) { + if b.inPos == len(b.in) { + return false + } + b.inPos++ + s.pos++ + } + s.pos = 0 + return true +} + +/* polynomial table used in decStreamHeader below */ +var xzCRC64Table = crc64.MakeTable(crc64.ECMA) + +/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ +func decStreamHeader(s *xzDec) xzRet { + if string(s.temp.buf[:len(headerMagic)]) != headerMagic { + return xzFormatError + } + if crc32.ChecksumIEEE(s.temp.buf[len(headerMagic):len(headerMagic)+2]) != + getLE32(s.temp.buf[len(headerMagic)+2:]) { + return xzDataError + } + if s.temp.buf[len(headerMagic)] != 0 { + return xzOptionsError + } + /* + * Of integrity checks, we support none (Check ID = 0), + * CRC32 (Check ID = 1), CRC64 (Check ID = 4) and SHA256 (Check ID = 10) + * However, we will accept other check types too, but then the check + * won't be verified and a warning (xzUnsupportedCheck) will be given. + */ + s.CheckType = CheckID(s.temp.buf[len(headerMagic)+1]) + if s.CheckType > checkMax { + return xzOptionsError + } + switch s.CheckType { + case CheckNone: + // CheckNone: no action needed + case CheckCRC32: + if s.checkCRC32 == nil { + s.checkCRC32 = crc32.NewIEEE() + } else { + s.checkCRC32.Reset() + } + s.check = s.checkCRC32 + case CheckCRC64: + if s.checkCRC64 == nil { + s.checkCRC64 = crc64.New(xzCRC64Table) + } else { + s.checkCRC64.Reset() + } + s.check = s.checkCRC64 + case CheckSHA256: + if s.checkSHA256 == nil { + s.checkSHA256 = sha256.New() + } else { + s.checkSHA256.Reset() + } + s.check = s.checkSHA256 + default: + return xzUnsupportedCheck + } + return xzOK +} + +/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ +func decStreamFooter(s *xzDec) xzRet { + if string(s.temp.buf[10:10+len(footerMagic)]) != footerMagic { + return xzDataError + } + if crc32.ChecksumIEEE(s.temp.buf[4:10]) != getLE32(s.temp.buf) { + return xzDataError + } + /* + * Validate Backward Size. Note that we never added the size of the + * Index CRC32 field to s->index.size, thus we use s->index.size / 4 + * instead of s->index.size / 4 - 1. + */ + if s.index.size>>2 != vliType(getLE32(s.temp.buf[4:])) { + return xzDataError + } + if s.temp.buf[8] != 0 || CheckID(s.temp.buf[9]) != s.CheckType { + return xzDataError + } + /* + * Use xzStreamEnd instead of xzOK to be more convenient + * for the caller. + */ + return xzStreamEnd +} + +/* Decode the Block Header and initialize the filter chain. */ +func decBlockHeader(s *xzDec) xzRet { + var ret xzRet + /* + * Validate the CRC32. We know that the temp buffer is at least + * eight bytes so this is safe. + */ + crc := getLE32(s.temp.buf[len(s.temp.buf)-4:]) + s.temp.buf = s.temp.buf[:len(s.temp.buf)-4] + if crc32.ChecksumIEEE(s.temp.buf) != crc { + return xzDataError + } + s.temp.pos = 2 + /* + * Catch unsupported Block Flags. + */ + if s.temp.buf[1]&0x3C != 0 { + return xzOptionsError + } + /* Compressed Size */ + if s.temp.buf[1]&0x40 != 0 { + if decVLI(s, s.temp.buf, &s.temp.pos) != xzStreamEnd { + return xzDataError + } + if s.vli >= 1<<63-8 { + // the whole block must stay smaller than 2^63 bytes + // the block header cannot be smaller than 8 bytes + return xzDataError + } + if s.vli == 0 { + // compressed size must be non-zero + return xzDataError + } + s.blockHeader.compressed = s.vli + } else { + s.blockHeader.compressed = vliUnknown + } + /* Uncompressed Size */ + if s.temp.buf[1]&0x80 != 0 { + if decVLI(s, s.temp.buf, &s.temp.pos) != xzStreamEnd { + return xzDataError + } + s.blockHeader.uncompressed = s.vli + } else { + s.blockHeader.uncompressed = vliUnknown + } + // get total number of filters (1-4) + filterTotal := int(s.temp.buf[1]&0x03) + 1 + // slice to hold decoded filters + filterList := make([]struct { + id xzFilterID + props uint32 + }, filterTotal) + // decode the non-last filters which cannot be LZMA2 + for i := 0; i < filterTotal-1; i++ { + /* Valid Filter Flags always take at least two bytes. */ + if len(s.temp.buf)-s.temp.pos < 2 { + return xzDataError + } + s.temp.pos += 2 + switch id := xzFilterID(s.temp.buf[s.temp.pos-2]); id { + case idDelta: + // delta filter + if s.temp.buf[s.temp.pos-1] != 0x01 { + return xzOptionsError + } + /* Filter Properties contains distance - 1 */ + if len(s.temp.buf)-s.temp.pos < 1 { + return xzDataError + } + props := uint32(s.temp.buf[s.temp.pos]) + s.temp.pos++ + filterList[i] = struct { + id xzFilterID + props uint32 + }{id: id, props: props} + case idBCJX86, idBCJPowerPC, idBCJIA64, + idBCJARM, idBCJARMThumb, idBCJSPARC: + // bcj filter + var props uint32 + switch s.temp.buf[s.temp.pos-1] { + case 0x00: + props = 0 + case 0x04: + if len(s.temp.buf)-s.temp.pos < 4 { + return xzDataError + } + props = getLE32(s.temp.buf[s.temp.pos:]) + s.temp.pos += 4 + default: + return xzOptionsError + } + filterList[i] = struct { + id xzFilterID + props uint32 + }{id: id, props: props} + default: + return xzOptionsError + } + } + /* + * decode the last filter which must be LZMA2 + */ + if len(s.temp.buf)-s.temp.pos < 2 { + return xzDataError + } + /* Filter ID = LZMA2 */ + if xzFilterID(s.temp.buf[s.temp.pos]) != idLZMA2 { + return xzOptionsError + } + s.temp.pos++ + /* Size of Properties = 1-byte Filter Properties */ + if s.temp.buf[s.temp.pos] != 0x01 { + return xzOptionsError + } + s.temp.pos++ + /* Filter Properties contains LZMA2 dictionary size. */ + if len(s.temp.buf)-s.temp.pos < 1 { + return xzDataError + } + props := uint32(s.temp.buf[s.temp.pos]) + s.temp.pos++ + filterList[filterTotal-1] = struct { + id xzFilterID + props uint32 + }{id: idLZMA2, props: props} + /* + * Process the filter list and create s.chain, going from last + * filter (LZMA2) to first filter + * + * First, LZMA2. + */ + ret = xzDecLZMA2Reset(s.lzma2, byte(filterList[filterTotal-1].props)) + if ret != xzOK { + return ret + } + s.chain = func(b *xzBuf) xzRet { + return xzDecLZMA2Run(s.lzma2, b) + } + /* + * Now the non-last filters + */ + for i := filterTotal - 2; i >= 0; i-- { + switch id := filterList[i].id; id { + case idDelta: + // delta filter + var delta *xzDecDelta + if s.deltasUsed < len(s.deltas) { + delta = s.deltas[s.deltasUsed] + } else { + delta = xzDecDeltaCreate() + s.deltas = append(s.deltas, delta) + } + s.deltasUsed++ + ret = xzDecDeltaReset(delta, int(filterList[i].props)+1) + if ret != xzOK { + return ret + } + chain := s.chain + s.chain = func(b *xzBuf) xzRet { + return xzDecDeltaRun(delta, b, chain) + } + case idBCJX86, idBCJPowerPC, idBCJIA64, + idBCJARM, idBCJARMThumb, idBCJSPARC: + // bcj filter + var bcj *xzDecBCJ + if s.bcjsUsed < len(s.bcjs) { + bcj = s.bcjs[s.bcjsUsed] + } else { + bcj = xzDecBCJCreate() + s.bcjs = append(s.bcjs, bcj) + } + s.bcjsUsed++ + ret = xzDecBCJReset(bcj, id, int(filterList[i].props)) + if ret != xzOK { + return ret + } + chain := s.chain + s.chain = func(b *xzBuf) xzRet { + return xzDecBCJRun(bcj, b, chain) + } + } + } + /* The rest must be Header Padding. */ + for s.temp.pos < len(s.temp.buf) { + if s.temp.buf[s.temp.pos] != 0x00 { + return xzOptionsError + } + s.temp.pos++ + } + s.temp.pos = 0 + s.block.compressed = 0 + s.block.uncompressed = 0 + return xzOK +} + +func decMain(s *xzDec, b *xzBuf) xzRet { + var ret xzRet + /* + * Store the start position for the case when we are in the middle + * of the Index field. + */ + s.inStart = b.inPos + for { + switch s.sequence { + case seqStreamHeader: + /* + * Stream Header is copied to s.temp, and then + * decoded from there. This way if the caller + * gives us only little input at a time, we can + * still keep the Stream Header decoding code + * simple. Similar approach is used in many places + * in this file. + */ + if !fillTemp(s, b) { + return xzOK + } + /* + * If decStreamHeader returns + * xzUnsupportedCheck, it is still possible + * to continue decoding. Thus, update s.sequence + * before calling decStreamHeader. + */ + s.sequence = seqBlockStart + ret = decStreamHeader(s) + if ret != xzOK { + return ret + } + fallthrough + case seqBlockStart: + /* We need one byte of input to continue. */ + if b.inPos == len(b.in) { + return xzOK + } + /* See if this is the beginning of the Index field. */ + if b.in[b.inPos] == 0 { + s.inStart = b.inPos + b.inPos++ + s.sequence = seqIndex + break + } + /* + * Calculate the size of the Block Header and + * prepare to decode it. + */ + s.blockHeader.size = (int(b.in[b.inPos]) + 1) * 4 + s.temp.buf = s.temp.bufArray[:s.blockHeader.size] + s.temp.pos = 0 + s.sequence = seqBlockHeader + fallthrough + case seqBlockHeader: + if !fillTemp(s, b) { + return xzOK + } + ret = decBlockHeader(s) + if ret != xzOK { + return ret + } + s.sequence = seqBlockUncompress + fallthrough + case seqBlockUncompress: + ret = decBlock(s, b) + if ret != xzStreamEnd { + return ret + } + s.sequence = seqBlockPadding + fallthrough + case seqBlockPadding: + /* + * Size of Compressed Data + Block Padding + * must be a multiple of four. We don't need + * s->block.compressed for anything else + * anymore, so we use it here to test the size + * of the Block Padding field. + */ + for s.block.compressed&3 != 0 { + if b.inPos == len(b.in) { + return xzOK + } + if b.in[b.inPos] != 0 { + return xzDataError + } + b.inPos++ + s.block.compressed++ + } + s.sequence = seqBlockCheck + fallthrough + case seqBlockCheck: + switch s.CheckType { + case CheckCRC32, CheckCRC64, CheckSHA256: + ret = checkValidate(s, b) + if ret != xzStreamEnd { + return ret + } + default: + if !checkSkip(s, b) { + return xzOK + } + } + s.sequence = seqBlockStart + case seqIndex: + ret = decIndex(s, b) + if ret != xzStreamEnd { + return ret + } + s.sequence = seqIndexPadding + fallthrough + case seqIndexPadding: + for (s.index.size+vliType(b.inPos-s.inStart))&3 != 0 { + if b.inPos == len(b.in) { + indexUpdate(s, b) + return xzOK + } + if b.in[b.inPos] != 0 { + return xzDataError + } + b.inPos++ + } + /* Finish the CRC32 value and Index size. */ + indexUpdate(s, b) + /* Compare the hashes to validate the Index field. */ + if !bytes.Equal( + s.block.hash.sha256.Sum(nil), s.index.hash.sha256.Sum(nil)) { + return xzDataError + } + s.sequence = seqIndexCRC32 + fallthrough + case seqIndexCRC32: + ret = crcValidate(s, b) + if ret != xzStreamEnd { + return ret + } + s.temp.buf = s.temp.bufArray[:streamHeaderSize] + s.sequence = seqStreamFooter + fallthrough + case seqStreamFooter: + if !fillTemp(s, b) { + return xzOK + } + return decStreamFooter(s) + } + } + /* Never reached */ +} + +/** + * xzDecRun - Run the XZ decoder + * @s: Decoder state allocated using xzDecInit + * @b: Input and output buffers + * + * See xzRet for details of return values. + * + * xzDecRun is a wrapper for decMain to handle some special cases. + * + * We must return xzBufError when it seems clear that we are not + * going to make any progress anymore. This is to prevent the caller + * from calling us infinitely when the input file is truncated or + * otherwise corrupt. Since zlib-style API allows that the caller + * fills the input buffer only when the decoder doesn't produce any + * new output, we have to be careful to avoid returning xzBufError + * too easily: xzBufError is returned only after the second + * consecutive call to xzDecRun that makes no progress. + */ +func xzDecRun(s *xzDec, b *xzBuf) xzRet { + inStart := b.inPos + outStart := b.outPos + ret := decMain(s, b) + if ret == xzOK && inStart == b.inPos && outStart == b.outPos { + if s.allowBufError { + ret = xzBufError + } + s.allowBufError = true + } else { + s.allowBufError = false + } + return ret +} + +/** + * xzDecInit - Allocate and initialize a XZ decoder state + * @dictMax: Maximum size of the LZMA2 dictionary (history buffer) for + * decoding. LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dictMax don't make sense. + * + * dictMax specifies the maximum allowed dictionary size that xzDecRun + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * + * xzDecInit returns a pointer to an xzDec, which is ready to be used with + * xzDecRun. + */ +func xzDecInit(dictMax uint32, header *Header) *xzDec { + s := new(xzDec) + s.crc32 = crc32.NewIEEE() + s.Header = header + s.block.hash.sha256 = sha256.New() + s.index.hash.sha256 = sha256.New() + s.lzma2 = xzDecLZMA2Create(dictMax) + xzDecReset(s) + return s +} + +/** + * xzDecReset - Reset an already allocated decoder state + * @s: Decoder state allocated using xzDecInit + * + * This function can be used to reset the decoder state without + * reallocating memory with xzDecInit. + */ +func xzDecReset(s *xzDec) { + s.sequence = seqStreamHeader + s.allowBufError = false + s.pos = 0 + s.crc32.Reset() + s.check = nil + s.CheckType = checkUnset + s.block.compressed = 0 + s.block.uncompressed = 0 + s.block.count = 0 + s.block.hash.unpadded = 0 + s.block.hash.uncompressed = 0 + s.block.hash.sha256.Reset() + s.index.sequence = seqIndexCount + s.index.size = 0 + s.index.count = 0 + s.index.hash.unpadded = 0 + s.index.hash.uncompressed = 0 + s.index.hash.sha256.Reset() + s.temp.pos = 0 + s.temp.buf = s.temp.bufArray[:streamHeaderSize] + s.chain = nil + s.bcjsUsed = 0 + s.deltasUsed = 0 +} diff --git a/src/vendor/github.com/xi2/xz/dec_util.go b/src/vendor/github.com/xi2/xz/dec_util.go new file mode 100644 index 00000000..c4227522 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_util.go @@ -0,0 +1,52 @@ +/* + * XZ decompressor utility functions + * + * Author: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +func getLE32(buf []byte) uint32 { + return uint32(buf[0]) | + uint32(buf[1])<<8 | + uint32(buf[2])<<16 | + uint32(buf[3])<<24 +} + +func getBE32(buf []byte) uint32 { + return uint32(buf[0])<<24 | + uint32(buf[1])<<16 | + uint32(buf[2])<<8 | + uint32(buf[3]) +} + +func putLE32(val uint32, buf []byte) { + buf[0] = byte(val) + buf[1] = byte(val >> 8) + buf[2] = byte(val >> 16) + buf[3] = byte(val >> 24) + return +} + +func putBE32(val uint32, buf []byte) { + buf[0] = byte(val >> 24) + buf[1] = byte(val >> 16) + buf[2] = byte(val >> 8) + buf[3] = byte(val) + return +} + +func putLE64(val uint64, buf []byte) { + buf[0] = byte(val) + buf[1] = byte(val >> 8) + buf[2] = byte(val >> 16) + buf[3] = byte(val >> 24) + buf[4] = byte(val >> 32) + buf[5] = byte(val >> 40) + buf[6] = byte(val >> 48) + buf[7] = byte(val >> 56) + return +} diff --git a/src/vendor/github.com/xi2/xz/dec_xz.go b/src/vendor/github.com/xi2/xz/dec_xz.go new file mode 100644 index 00000000..1b18a838 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/dec_xz.go @@ -0,0 +1,124 @@ +/* + * XZ decompressor + * + * Authors: Lasse Collin + * Igor Pavlov + * + * Translation to Go: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +/* from linux/include/linux/xz.h **************************************/ + +/** + * xzRet - Return codes + * @xzOK: Everything is OK so far. More input or more + * output space is required to continue. + * @xzStreamEnd: Operation finished successfully. + * @xzUnSupportedCheck: Integrity check type is not supported. Decoding + * is still possible by simply calling xzDecRun + * again. + * @xzMemlimitError: A bigger LZMA2 dictionary would be needed than + * allowed by the dictMax argument given to + * xzDecInit. + * @xzFormatError: File format was not recognized (wrong magic + * bytes). + * @xzOptionsError: This implementation doesn't support the requested + * compression options. In the decoder this means + * that the header CRC32 matches, but the header + * itself specifies something that we don't support. + * @xzDataError: Compressed data is corrupt. + * @xzBufError: Cannot make any progress. + * + * xzBufError is returned when two consecutive calls to XZ code cannot + * consume any input and cannot produce any new output. This happens + * when there is no new input available, or the output buffer is full + * while at least one output byte is still pending. Assuming your code + * is not buggy, you can get this error only when decoding a + * compressed stream that is truncated or otherwise corrupt. + */ +type xzRet int + +const ( + xzOK xzRet = iota + xzStreamEnd + xzUnsupportedCheck + xzMemlimitError + xzFormatError + xzOptionsError + xzDataError + xzBufError +) + +/** + * xzBuf - Passing input and output buffers to XZ code + * @in: Input buffer. + * @inPos: Current position in the input buffer. This must not exceed + * input buffer size. + * @out: Output buffer. + * @outPos: Current position in the output buffer. This must not exceed + * output buffer size. + * + * Only the contents of the output buffer from out[outPos] onward, and + * the variables inPos and outPos are modified by the XZ code. + */ +type xzBuf struct { + in []byte + inPos int + out []byte + outPos int +} + +/* All XZ filter IDs */ +type xzFilterID int64 + +const ( + idDelta xzFilterID = 0x03 + idBCJX86 xzFilterID = 0x04 + idBCJPowerPC xzFilterID = 0x05 + idBCJIA64 xzFilterID = 0x06 + idBCJARM xzFilterID = 0x07 + idBCJARMThumb xzFilterID = 0x08 + idBCJSPARC xzFilterID = 0x09 + idLZMA2 xzFilterID = 0x21 +) + +// CheckID is the type of the data integrity check in an XZ stream +// calculated from the uncompressed data. +type CheckID int + +func (id CheckID) String() string { + switch id { + case CheckNone: + return "None" + case CheckCRC32: + return "CRC32" + case CheckCRC64: + return "CRC64" + case CheckSHA256: + return "SHA256" + default: + return "Unknown" + } +} + +const ( + CheckNone CheckID = 0x00 + CheckCRC32 CheckID = 0x01 + CheckCRC64 CheckID = 0x04 + CheckSHA256 CheckID = 0x0A + checkMax CheckID = 0x0F + checkUnset CheckID = -1 +) + +// An XZ stream contains a stream header which holds information about +// the stream. That information is exposed as fields of the +// Reader. Currently it contains only the stream's data integrity +// check type. +type Header struct { + CheckType CheckID // type of the stream's data integrity check +} diff --git a/src/vendor/github.com/xi2/xz/doc.go b/src/vendor/github.com/xi2/xz/doc.go new file mode 100644 index 00000000..f8c62e62 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/doc.go @@ -0,0 +1,35 @@ +// Package xz implements XZ decompression natively in Go. +// +// Usage +// +// For ease of use, this package is designed to have a similar API to +// compress/gzip. See the examples for further details. +// +// Implementation +// +// This package is a translation from C to Go of XZ Embedded +// (http://tukaani.org/xz/embedded.html) with enhancements made so as +// to implement all mandatory and optional parts of the XZ file format +// specification v1.0.4. It supports all filters and block check +// types, supports multiple streams, and performs index verification +// using SHA-256 as recommended by the specification. +// +// Speed +// +// On the author's Intel Ivybridge i5, decompression speed is about +// half that of the standard XZ Utils (tested with a recent linux +// kernel tarball). +// +// Thanks +// +// Thanks are due to Lasse Collin and Igor Pavlov, the authors of XZ +// Embedded, on whose code package xz is based. It would not exist +// without their decision to allow others to modify and reuse their +// code. +// +// Bug reports +// +// For bug reports relating to this package please contact the author +// through https://github.com/xi2/xz/issues, and not the authors of XZ +// Embedded. +package xz diff --git a/src/vendor/github.com/xi2/xz/reader.go b/src/vendor/github.com/xi2/xz/reader.go new file mode 100644 index 00000000..e321d755 --- /dev/null +++ b/src/vendor/github.com/xi2/xz/reader.go @@ -0,0 +1,256 @@ +/* + * Package xz Go Reader API + * + * Author: Michael Cross + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package xz + +import ( + "errors" + "io" +) + +// Package specific errors. +var ( + ErrUnsupportedCheck = errors.New("xz: integrity check type not supported") + ErrMemlimit = errors.New("xz: LZMA2 dictionary size exceeds max") + ErrFormat = errors.New("xz: file format not recognized") + ErrOptions = errors.New("xz: compression options not supported") + ErrData = errors.New("xz: data is corrupt") + ErrBuf = errors.New("xz: data is truncated or corrupt") +) + +// DefaultDictMax is the default maximum dictionary size in bytes used +// by the decoder. This value is sufficient to decompress files +// created with XZ Utils "xz -9". +const DefaultDictMax = 1 << 26 // 64 MiB + +// inBufSize is the input buffer size used by the decoder. +const inBufSize = 1 << 13 // 8 KiB + +// A Reader is an io.Reader that can be used to retrieve uncompressed +// data from an XZ file. +// +// In general, an XZ file can be a concatenation of other XZ +// files. Reads from the Reader return the concatenation of the +// uncompressed data of each. +type Reader struct { + Header + r io.Reader // the wrapped io.Reader + multistream bool // true if reader is in multistream mode + rEOF bool // true after io.EOF received on r + dEOF bool // true after decoder has completed + padding int // bytes of stream padding read (or -1) + in [inBufSize]byte // backing array for buf.in + buf *xzBuf // decoder input/output buffers + dec *xzDec // decoder state + err error // the result of the last decoder call +} + +// NewReader creates a new Reader reading from r. The decompressor +// will use an LZMA2 dictionary size up to dictMax bytes in +// size. Passing a value of zero sets dictMax to DefaultDictMax. If +// an individual XZ stream requires a dictionary size greater than +// dictMax in order to decompress, Read will return ErrMemlimit. +// +// If NewReader is passed a value of nil for r then a Reader is +// created such that all read attempts will return io.EOF. This is +// useful if you just want to allocate memory for a Reader which will +// later be initialized with Reset. +// +// Due to internal buffering, the Reader may read more data than +// necessary from r. +func NewReader(r io.Reader, dictMax uint32) (*Reader, error) { + if dictMax == 0 { + dictMax = DefaultDictMax + } + z := &Reader{ + r: r, + multistream: true, + padding: -1, + buf: &xzBuf{}, + } + if r == nil { + z.rEOF, z.dEOF = true, true + } + z.dec = xzDecInit(dictMax, &z.Header) + var err error + if r != nil { + _, err = z.Read(nil) // read stream header + } + return z, err +} + +// decode is a wrapper around xzDecRun that additionally handles +// stream padding. It treats the padding as a kind of stream that +// decodes to nothing. +// +// When decoding padding, z.padding >= 0 +// When decoding a real stream, z.padding == -1 +func (z *Reader) decode() (ret xzRet) { + if z.padding >= 0 { + // read all padding in input buffer + for z.buf.inPos < len(z.buf.in) && + z.buf.in[z.buf.inPos] == 0 { + z.buf.inPos++ + z.padding++ + } + switch { + case z.buf.inPos == len(z.buf.in) && z.rEOF: + // case: out of padding. no more input data available + if z.padding%4 != 0 { + ret = xzDataError + } else { + ret = xzStreamEnd + } + case z.buf.inPos == len(z.buf.in): + // case: read more padding next loop iteration + ret = xzOK + default: + // case: out of padding. more input data available + if z.padding%4 != 0 { + ret = xzDataError + } else { + xzDecReset(z.dec) + ret = xzStreamEnd + } + } + } else { + ret = xzDecRun(z.dec, z.buf) + } + return +} + +func (z *Reader) Read(p []byte) (n int, err error) { + // restore err + err = z.err + // set decoder output buffer to p + z.buf.out = p + z.buf.outPos = 0 + for { + // update n + n = z.buf.outPos + // if last call to decoder ended with an error, return that error + if err != nil { + break + } + // if decoder has finished, return with err == io.EOF + if z.dEOF { + err = io.EOF + break + } + // if p full, return with err == nil, unless we have not yet + // read the stream header with Read(nil) + if n == len(p) && z.CheckType != checkUnset { + break + } + // if needed, read more data from z.r + if z.buf.inPos == len(z.buf.in) && !z.rEOF { + rn, e := z.r.Read(z.in[:]) + if e != nil && e != io.EOF { + // read error + err = e + break + } + if e == io.EOF { + z.rEOF = true + } + // set new input buffer in z.buf + z.buf.in = z.in[:rn] + z.buf.inPos = 0 + } + // decode more data + ret := z.decode() + switch ret { + case xzOK: + // no action needed + case xzStreamEnd: + if z.padding >= 0 { + z.padding = -1 + if !z.multistream || z.rEOF { + z.dEOF = true + } + } else { + z.padding = 0 + } + case xzUnsupportedCheck: + err = ErrUnsupportedCheck + case xzMemlimitError: + err = ErrMemlimit + case xzFormatError: + err = ErrFormat + case xzOptionsError: + err = ErrOptions + case xzDataError: + err = ErrData + case xzBufError: + err = ErrBuf + } + // save err + z.err = err + } + return +} + +// Multistream controls whether the reader is operating in multistream +// mode. +// +// If enabled (the default), the Reader expects the input to be a +// sequence of XZ streams, possibly interspersed with stream padding, +// which it reads one after another. The effect is that the +// concatenation of a sequence of XZ streams or XZ files is +// treated as equivalent to the compressed result of the concatenation +// of the sequence. This is standard behaviour for XZ readers. +// +// Calling Multistream(false) disables this behaviour; disabling the +// behaviour can be useful when reading file formats that distinguish +// individual XZ streams. In this mode, when the Reader reaches the +// end of the stream, Read returns io.EOF. To start the next stream, +// call z.Reset(nil) followed by z.Multistream(false). If there is no +// next stream, z.Reset(nil) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// Reset, for non-nil values of io.Reader r, discards the Reader z's +// state and makes it equivalent to the result of its original state +// from NewReader, but reading from r instead. This permits reusing a +// Reader rather than allocating a new one. +// +// If you wish to leave r unchanged use z.Reset(nil). This keeps r +// unchanged and ensures internal buffering is preserved. If the +// Reader was at the end of a stream it is then ready to read any +// follow on streams. If there are no follow on streams z.Reset(nil) +// returns io.EOF. If the Reader was not at the end of a stream then +// z.Reset(nil) does nothing. +func (z *Reader) Reset(r io.Reader) error { + switch { + case r == nil: + z.multistream = true + if !z.dEOF { + return nil + } + if z.rEOF { + return io.EOF + } + z.dEOF = false + _, err := z.Read(nil) // read stream header + return err + default: + z.r = r + z.multistream = true + z.rEOF = false + z.dEOF = false + z.padding = -1 + z.buf.in = nil + z.buf.inPos = 0 + xzDecReset(z.dec) + z.err = nil + _, err := z.Read(nil) // read stream header + return err + } +} diff --git a/src/vendor/modules.txt b/src/vendor/modules.txt index 7be3b548..63f2ad61 100644 --- a/src/vendor/modules.txt +++ b/src/vendor/modules.txt @@ -278,6 +278,9 @@ github.com/rglonek/sbs # github.com/rivo/uniseg v0.4.7 ## explicit; go 1.18 github.com/rivo/uniseg +# github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 +## explicit +github.com/xi2/xz # github.com/yuin/gopher-lua v1.1.1 ## explicit; go 1.17 github.com/yuin/gopher-lua