]>
Commit | Line | Data |
---|---|---|
15c0b25d AP |
1 | // Copyright 2014-2017 Ulrich Kunitz. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | // Package xz supports the compression and decompression of xz files. It | |
6 | // supports version 1.0.4 of the specification without the non-LZMA2 | |
7 | // filters. See http://tukaani.org/xz/xz-file-format-1.0.4.txt | |
8 | package xz | |
9 | ||
10 | import ( | |
11 | "bytes" | |
12 | "errors" | |
13 | "fmt" | |
14 | "hash" | |
15 | "io" | |
16 | ||
17 | "github.com/ulikunitz/xz/internal/xlog" | |
18 | "github.com/ulikunitz/xz/lzma" | |
19 | ) | |
20 | ||
21 | // ReaderConfig defines the parameters for the xz reader. The | |
22 | // SingleStream parameter requests the reader to assume that the | |
23 | // underlying stream contains only a single stream. | |
24 | type ReaderConfig struct { | |
25 | DictCap int | |
26 | SingleStream bool | |
27 | } | |
28 | ||
29 | // fill replaces all zero values with their default values. | |
30 | func (c *ReaderConfig) fill() { | |
31 | if c.DictCap == 0 { | |
32 | c.DictCap = 8 * 1024 * 1024 | |
33 | } | |
34 | } | |
35 | ||
36 | // Verify checks the reader parameters for Validity. Zero values will be | |
37 | // replaced by default values. | |
38 | func (c *ReaderConfig) Verify() error { | |
39 | if c == nil { | |
40 | return errors.New("xz: reader parameters are nil") | |
41 | } | |
42 | lc := lzma.Reader2Config{DictCap: c.DictCap} | |
43 | if err := lc.Verify(); err != nil { | |
44 | return err | |
45 | } | |
46 | return nil | |
47 | } | |
48 | ||
49 | // Reader supports the reading of one or multiple xz streams. | |
50 | type Reader struct { | |
51 | ReaderConfig | |
52 | ||
53 | xz io.Reader | |
54 | sr *streamReader | |
55 | } | |
56 | ||
57 | // streamReader decodes a single xz stream | |
58 | type streamReader struct { | |
59 | ReaderConfig | |
60 | ||
61 | xz io.Reader | |
62 | br *blockReader | |
63 | newHash func() hash.Hash | |
64 | h header | |
65 | index []record | |
66 | } | |
67 | ||
68 | // NewReader creates a new xz reader using the default parameters. | |
69 | // The function reads and checks the header of the first XZ stream. The | |
70 | // reader will process multiple streams including padding. | |
71 | func NewReader(xz io.Reader) (r *Reader, err error) { | |
72 | return ReaderConfig{}.NewReader(xz) | |
73 | } | |
74 | ||
75 | // NewReader creates an xz stream reader. The created reader will be | |
76 | // able to process multiple streams and padding unless a SingleStream | |
77 | // has been set in the reader configuration c. | |
78 | func (c ReaderConfig) NewReader(xz io.Reader) (r *Reader, err error) { | |
79 | if err = c.Verify(); err != nil { | |
80 | return nil, err | |
81 | } | |
82 | r = &Reader{ | |
83 | ReaderConfig: c, | |
84 | xz: xz, | |
85 | } | |
86 | if r.sr, err = c.newStreamReader(xz); err != nil { | |
87 | if err == io.EOF { | |
88 | err = io.ErrUnexpectedEOF | |
89 | } | |
90 | return nil, err | |
91 | } | |
92 | return r, nil | |
93 | } | |
94 | ||
95 | var errUnexpectedData = errors.New("xz: unexpected data after stream") | |
96 | ||
97 | // Read reads uncompressed data from the stream. | |
98 | func (r *Reader) Read(p []byte) (n int, err error) { | |
99 | for n < len(p) { | |
100 | if r.sr == nil { | |
101 | if r.SingleStream { | |
102 | data := make([]byte, 1) | |
103 | _, err = io.ReadFull(r.xz, data) | |
104 | if err != io.EOF { | |
105 | return n, errUnexpectedData | |
106 | } | |
107 | return n, io.EOF | |
108 | } | |
109 | for { | |
110 | r.sr, err = r.ReaderConfig.newStreamReader(r.xz) | |
111 | if err != errPadding { | |
112 | break | |
113 | } | |
114 | } | |
115 | if err != nil { | |
116 | return n, err | |
117 | } | |
118 | } | |
119 | k, err := r.sr.Read(p[n:]) | |
120 | n += k | |
121 | if err != nil { | |
122 | if err == io.EOF { | |
123 | r.sr = nil | |
124 | continue | |
125 | } | |
126 | return n, err | |
127 | } | |
128 | } | |
129 | return n, nil | |
130 | } | |
131 | ||
132 | var errPadding = errors.New("xz: padding (4 zero bytes) encountered") | |
133 | ||
134 | // newStreamReader creates a new xz stream reader using the given configuration | |
135 | // parameters. NewReader reads and checks the header of the xz stream. | |
136 | func (c ReaderConfig) newStreamReader(xz io.Reader) (r *streamReader, err error) { | |
137 | if err = c.Verify(); err != nil { | |
138 | return nil, err | |
139 | } | |
140 | data := make([]byte, HeaderLen) | |
141 | if _, err := io.ReadFull(xz, data[:4]); err != nil { | |
142 | return nil, err | |
143 | } | |
144 | if bytes.Equal(data[:4], []byte{0, 0, 0, 0}) { | |
145 | return nil, errPadding | |
146 | } | |
147 | if _, err = io.ReadFull(xz, data[4:]); err != nil { | |
148 | if err == io.EOF { | |
149 | err = io.ErrUnexpectedEOF | |
150 | } | |
151 | return nil, err | |
152 | } | |
153 | r = &streamReader{ | |
154 | ReaderConfig: c, | |
155 | xz: xz, | |
156 | index: make([]record, 0, 4), | |
157 | } | |
158 | if err = r.h.UnmarshalBinary(data); err != nil { | |
159 | return nil, err | |
160 | } | |
161 | xlog.Debugf("xz header %s", r.h) | |
162 | if r.newHash, err = newHashFunc(r.h.flags); err != nil { | |
163 | return nil, err | |
164 | } | |
165 | return r, nil | |
166 | } | |
167 | ||
168 | // errIndex indicates an error with the xz file index. | |
169 | var errIndex = errors.New("xz: error in xz file index") | |
170 | ||
171 | // readTail reads the index body and the xz footer. | |
172 | func (r *streamReader) readTail() error { | |
173 | index, n, err := readIndexBody(r.xz) | |
174 | if err != nil { | |
175 | if err == io.EOF { | |
176 | err = io.ErrUnexpectedEOF | |
177 | } | |
178 | return err | |
179 | } | |
180 | if len(index) != len(r.index) { | |
181 | return fmt.Errorf("xz: index length is %d; want %d", | |
182 | len(index), len(r.index)) | |
183 | } | |
184 | for i, rec := range r.index { | |
185 | if rec != index[i] { | |
186 | return fmt.Errorf("xz: record %d is %v; want %v", | |
187 | i, rec, index[i]) | |
188 | } | |
189 | } | |
190 | ||
191 | p := make([]byte, footerLen) | |
192 | if _, err = io.ReadFull(r.xz, p); err != nil { | |
193 | if err == io.EOF { | |
194 | err = io.ErrUnexpectedEOF | |
195 | } | |
196 | return err | |
197 | } | |
198 | var f footer | |
199 | if err = f.UnmarshalBinary(p); err != nil { | |
200 | return err | |
201 | } | |
202 | xlog.Debugf("xz footer %s", f) | |
203 | if f.flags != r.h.flags { | |
204 | return errors.New("xz: footer flags incorrect") | |
205 | } | |
206 | if f.indexSize != int64(n)+1 { | |
207 | return errors.New("xz: index size in footer wrong") | |
208 | } | |
209 | return nil | |
210 | } | |
211 | ||
212 | // Read reads actual data from the xz stream. | |
213 | func (r *streamReader) Read(p []byte) (n int, err error) { | |
214 | for n < len(p) { | |
215 | if r.br == nil { | |
216 | bh, hlen, err := readBlockHeader(r.xz) | |
217 | if err != nil { | |
218 | if err == errIndexIndicator { | |
219 | if err = r.readTail(); err != nil { | |
220 | return n, err | |
221 | } | |
222 | return n, io.EOF | |
223 | } | |
224 | return n, err | |
225 | } | |
226 | xlog.Debugf("block %v", *bh) | |
227 | r.br, err = r.ReaderConfig.newBlockReader(r.xz, bh, | |
228 | hlen, r.newHash()) | |
229 | if err != nil { | |
230 | return n, err | |
231 | } | |
232 | } | |
233 | k, err := r.br.Read(p[n:]) | |
234 | n += k | |
235 | if err != nil { | |
236 | if err == io.EOF { | |
237 | r.index = append(r.index, r.br.record()) | |
238 | r.br = nil | |
239 | } else { | |
240 | return n, err | |
241 | } | |
242 | } | |
243 | } | |
244 | return n, nil | |
245 | } | |
246 | ||
247 | // countingReader is a reader that counts the bytes read. | |
248 | type countingReader struct { | |
249 | r io.Reader | |
250 | n int64 | |
251 | } | |
252 | ||
253 | // Read reads data from the wrapped reader and adds it to the n field. | |
254 | func (lr *countingReader) Read(p []byte) (n int, err error) { | |
255 | n, err = lr.r.Read(p) | |
256 | lr.n += int64(n) | |
257 | return n, err | |
258 | } | |
259 | ||
260 | // blockReader supports the reading of a block. | |
261 | type blockReader struct { | |
262 | lxz countingReader | |
263 | header *blockHeader | |
264 | headerLen int | |
265 | n int64 | |
266 | hash hash.Hash | |
267 | r io.Reader | |
268 | err error | |
269 | } | |
270 | ||
271 | // newBlockReader creates a new block reader. | |
272 | func (c *ReaderConfig) newBlockReader(xz io.Reader, h *blockHeader, | |
273 | hlen int, hash hash.Hash) (br *blockReader, err error) { | |
274 | ||
275 | br = &blockReader{ | |
276 | lxz: countingReader{r: xz}, | |
277 | header: h, | |
278 | headerLen: hlen, | |
279 | hash: hash, | |
280 | } | |
281 | ||
282 | fr, err := c.newFilterReader(&br.lxz, h.filters) | |
283 | if err != nil { | |
284 | return nil, err | |
285 | } | |
286 | br.r = io.TeeReader(fr, br.hash) | |
287 | ||
288 | return br, nil | |
289 | } | |
290 | ||
291 | // uncompressedSize returns the uncompressed size of the block. | |
292 | func (br *blockReader) uncompressedSize() int64 { | |
293 | return br.n | |
294 | } | |
295 | ||
296 | // compressedSize returns the compressed size of the block. | |
297 | func (br *blockReader) compressedSize() int64 { | |
298 | return br.lxz.n | |
299 | } | |
300 | ||
301 | // unpaddedSize computes the unpadded size for the block. | |
302 | func (br *blockReader) unpaddedSize() int64 { | |
303 | n := int64(br.headerLen) | |
304 | n += br.compressedSize() | |
305 | n += int64(br.hash.Size()) | |
306 | return n | |
307 | } | |
308 | ||
309 | // record returns the index record for the current block. | |
310 | func (br *blockReader) record() record { | |
311 | return record{br.unpaddedSize(), br.uncompressedSize()} | |
312 | } | |
313 | ||
314 | // errBlockSize indicates that the size of the block in the block header | |
315 | // is wrong. | |
316 | var errBlockSize = errors.New("xz: wrong uncompressed size for block") | |
317 | ||
318 | // Read reads data from the block. | |
319 | func (br *blockReader) Read(p []byte) (n int, err error) { | |
320 | n, err = br.r.Read(p) | |
321 | br.n += int64(n) | |
322 | ||
323 | u := br.header.uncompressedSize | |
324 | if u >= 0 && br.uncompressedSize() > u { | |
325 | return n, errors.New("xz: wrong uncompressed size for block") | |
326 | } | |
327 | c := br.header.compressedSize | |
328 | if c >= 0 && br.compressedSize() > c { | |
329 | return n, errors.New("xz: wrong compressed size for block") | |
330 | } | |
331 | if err != io.EOF { | |
332 | return n, err | |
333 | } | |
334 | if br.uncompressedSize() < u || br.compressedSize() < c { | |
335 | return n, io.ErrUnexpectedEOF | |
336 | } | |
337 | ||
338 | s := br.hash.Size() | |
339 | k := padLen(br.lxz.n) | |
340 | q := make([]byte, k+s, k+2*s) | |
341 | if _, err = io.ReadFull(br.lxz.r, q); err != nil { | |
342 | if err == io.EOF { | |
343 | err = io.ErrUnexpectedEOF | |
344 | } | |
345 | return n, err | |
346 | } | |
347 | if !allZeros(q[:k]) { | |
348 | return n, errors.New("xz: non-zero block padding") | |
349 | } | |
350 | checkSum := q[k:] | |
351 | computedSum := br.hash.Sum(checkSum[s:]) | |
352 | if !bytes.Equal(checkSum, computedSum) { | |
353 | return n, errors.New("xz: checksum error for block") | |
354 | } | |
355 | return n, io.EOF | |
356 | } | |
357 | ||
358 | func (c *ReaderConfig) newFilterReader(r io.Reader, f []filter) (fr io.Reader, | |
359 | err error) { | |
360 | ||
361 | if err = verifyFilters(f); err != nil { | |
362 | return nil, err | |
363 | } | |
364 | ||
365 | fr = r | |
366 | for i := len(f) - 1; i >= 0; i-- { | |
367 | fr, err = f[i].reader(fr, c) | |
368 | if err != nil { | |
369 | return nil, err | |
370 | } | |
371 | } | |
372 | return fr, nil | |
373 | } |