1 // Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Package xz supports the compression and decompression of xz files. It
6 // supports version 1.0.4 of the specification without the non-LZMA2
7 // filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt
17 "github.com/ulikunitz/xz/internal/xlog"
18 "github.com/ulikunitz/xz/lzma"
21 // ReaderConfig defines the parameters for the xz reader. The
22 // SingleStream parameter requests the reader to assume that the
23 // underlying stream contains only a single stream.
24 type ReaderConfig struct {
29 // fill replaces all zero values with their default values.
30 func (c *ReaderConfig) fill() {
32 c.DictCap = 8 * 1024 * 1024
36 // Verify checks the reader parameters for Validity. Zero values will be
37 // replaced by default values.
38 func (c *ReaderConfig) Verify() error {
40 return errors.New("xz: reader parameters are nil")
42 lc := lzma.Reader2Config{DictCap: c.DictCap}
43 if err := lc.Verify(); err != nil {
49 // Reader supports the reading of one or multiple xz streams.
57 // streamReader decodes a single xz stream
58 type streamReader struct {
63 newHash func() hash.Hash
68 // NewReader creates a new xz reader using the default parameters.
69 // The function reads and checks the header of the first XZ stream. The
70 // reader will process multiple streams including padding.
71 func NewReader(xz io.Reader) (r *Reader, err error) {
72 return ReaderConfig{}.NewReader(xz)
75 // NewReader creates an xz stream reader. The created reader will be
76 // able to process multiple streams and padding unless a SingleStream
77 // has been set in the reader configuration c.
78 func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) {
79 if err = c.Verify(); err != nil {
86 if r.sr, err = c.newStreamReader(xz); err != nil {
88 err = io.ErrUnexpectedEOF
95 var errUnexpectedData = errors.New("xz: unexpected data after stream")
97 // Read reads uncompressed data from the stream.
98 func (r *Reader) Read(p []byte) (n int, err error) {
102 data := make([]byte, 1)
103 _, err = io.ReadFull(r.xz, data)
105 return n, errUnexpectedData
110 r.sr, err = r.ReaderConfig.newStreamReader(r.xz)
111 if err != errPadding {
119 k, err := r.sr.Read(p[n:])
132 var errPadding = errors.New("xz: padding (4 zero bytes) encountered")
134 // newStreamReader creates a new xz stream reader using the given configuration
135 // parameters. NewReader reads and checks the header of the xz stream.
136 func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) {
137 if err = c.Verify(); err != nil {
140 data := make([]byte, HeaderLen)
141 if _, err := io.ReadFull(xz, data[:4]); err != nil {
144 if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) {
145 return nil, errPadding
147 if _, err = io.ReadFull(xz, data[4:]); err != nil {
149 err = io.ErrUnexpectedEOF
156 index: make([]record, 0, 4),
158 if err = r.h.UnmarshalBinary(data); err != nil {
161 xlog.Debugf("xz header %s", r.h)
162 if r.newHash, err = newHashFunc(r.h.flags); err != nil {
168 // errIndex indicates an error with the xz file index.
169 var errIndex = errors.New("xz: error in xz file index")
171 // readTail reads the index body and the xz footer.
172 func (r *streamReader) readTail() error {
173 index, n, err := readIndexBody(r.xz)
176 err = io.ErrUnexpectedEOF
180 if len(index) != len(r.index) {
181 return fmt.Errorf("xz: index length is %d; want %d",
182 len(index), len(r.index))
184 for i, rec := range r.index {
186 return fmt.Errorf("xz: record %d is %v; want %v",
191 p := make([]byte, footerLen)
192 if _, err = io.ReadFull(r.xz, p); err != nil {
194 err = io.ErrUnexpectedEOF
199 if err = f.UnmarshalBinary(p); err != nil {
202 xlog.Debugf("xz footer %s", f)
203 if f.flags != r.h.flags {
204 return errors.New("xz: footer flags incorrect")
206 if f.indexSize != int64(n)+1 {
207 return errors.New("xz: index size in footer wrong")
212 // Read reads actual data from the xz stream.
213 func (r *streamReader) Read(p []byte) (n int, err error) {
216 bh, hlen, err := readBlockHeader(r.xz)
218 if err == errIndexIndicator {
219 if err = r.readTail(); err != nil {
226 xlog.Debugf("block %v", *bh)
227 r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh,
233 k, err := r.br.Read(p[n:])
237 r.index = append(r.index, r.br.record())
247 // countingReader is a reader that counts the bytes read.
248 type countingReader struct {
253 // Read reads data from the wrapped reader and adds it to the n field.
254 func (lr *countingReader) Read(p []byte) (n int, err error) {
255 n, err = lr.r.Read(p)
260 // blockReader supports the reading of a block.
261 type blockReader struct {
271 // newBlockReader creates a new block reader.
272 func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader,
273 hlen int, hash hash.Hash) (br *blockReader, err error) {
276 lxz: countingReader{r: xz},
282 fr, err := c.newFilterReader(&br.lxz, h.filters)
286 br.r = io.TeeReader(fr, br.hash)
291 // uncompressedSize returns the uncompressed size of the block.
292 func (br *blockReader) uncompressedSize() int64 {
296 // compressedSize returns the compressed size of the block.
297 func (br *blockReader) compressedSize() int64 {
301 // unpaddedSize computes the unpadded size for the block.
302 func (br *blockReader) unpaddedSize() int64 {
303 n := int64(br.headerLen)
304 n += br.compressedSize()
305 n += int64(br.hash.Size())
309 // record returns the index record for the current block.
310 func (br *blockReader) record() record {
311 return record{br.unpaddedSize(), br.uncompressedSize()}
314 // errBlockSize indicates that the size of the block in the block header
316 var errBlockSize = errors.New("xz: wrong uncompressed size for block")
318 // Read reads data from the block.
319 func (br *blockReader) Read(p []byte) (n int, err error) {
320 n, err = br.r.Read(p)
323 u := br.header.uncompressedSize
324 if u >= 0 && br.uncompressedSize() > u {
325 return n, errors.New("xz: wrong uncompressed size for block")
327 c := br.header.compressedSize
328 if c >= 0 && br.compressedSize() > c {
329 return n, errors.New("xz: wrong compressed size for block")
334 if br.uncompressedSize() < u || br.compressedSize() < c {
335 return n, io.ErrUnexpectedEOF
339 k := padLen(br.lxz.n)
340 q := make([]byte, k+s, k+2*s)
341 if _, err = io.ReadFull(br.lxz.r, q); err != nil {
343 err = io.ErrUnexpectedEOF
347 if !allZeros(q[:k]) {
348 return n, errors.New("xz: non-zero block padding")
351 computedSum := br.hash.Sum(checkSum[s:])
352 if !bytes.Equal(checkSum, computedSum) {
353 return n, errors.New("xz: checksum error for block")
358 func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader,
361 if err = verifyFilters(f); err != nil {
366 for i := len(f) - 1; i >= 0; i-- {
367 fr, err = f[i].reader(fr, c)