1 // Copyright 2014-2017 Ulrich Kunitz. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
16 "github.com/ulikunitz/xz/lzma"
19 // allZeros checks whether a given byte slice has only zeros.
20 func allZeros(p []byte) bool {
29 // padLen returns the length of the padding required for the given
31 func padLen(n int64) int {
41 // headerMagic stores the magic bytes for the header
42 var headerMagic = []byte{0xfd, '7', 'z', 'X', 'Z', 0x00}
44 // HeaderLen provides the length of the xz file header.
47 // Constants for the checksum methods supported by xz.
54 // errInvalidFlags indicates that flags are invalid.
55 var errInvalidFlags = errors.New("xz: invalid flags")
57 // verifyFlags returns the error errInvalidFlags if the value is
59 func verifyFlags(flags byte) error {
61 case CRC32, CRC64, SHA256:
64 return errInvalidFlags
68 // flagstrings maps flag values to strings.
69 var flagstrings = map[byte]string{
75 // flagString returns the string representation for the given flags.
76 func flagString(flags byte) string {
77 s, ok := flagstrings[flags]
84 // newHashFunc returns a function that creates hash instances for the
85 // hash method encoded in flags.
86 func newHashFunc(flags byte) (newHash func() hash.Hash, err error) {
100 // header provides the actual content of the xz file header: the flags.
105 // Errors returned by readHeader.
106 var errHeaderMagic = errors.New("xz: invalid header magic bytes")
108 // ValidHeader checks whether data is a correct xz file header. The
109 // length of data must be HeaderLen.
110 func ValidHeader(data []byte) bool {
112 err := h.UnmarshalBinary(data)
116 // String returns a string representation of the flags.
117 func (h header) String() string {
118 return flagString(h.flags)
121 // UnmarshalBinary reads header from the provided data slice.
122 func (h *header) UnmarshalBinary(data []byte) error {
124 if len(data) != HeaderLen {
125 return errors.New("xz: wrong file header length")
129 if !bytes.Equal(headerMagic, data[:6]) {
130 return errHeaderMagic
134 crc := crc32.NewIEEE()
136 if uint32LE(data[8:]) != crc.Sum32() {
137 return errors.New("xz: invalid checksum for file header")
142 return errInvalidFlags
145 if err := verifyFlags(flags); err != nil {
153 // MarshalBinary generates the xz file header.
154 func (h *header) MarshalBinary() (data []byte, err error) {
155 if err = verifyFlags(h.flags); err != nil {
159 data = make([]byte, 12)
160 copy(data, headerMagic)
163 crc := crc32.NewIEEE()
165 putUint32LE(data[8:], crc.Sum32())
172 // footerLen defines the length of the footer.
175 // footerMagic contains the footer magic bytes.
176 var footerMagic = []byte{'Y', 'Z'}
178 // footer represents the content of the xz file footer.
184 // String prints a string representation of the footer structure.
185 func (f footer) String() string {
186 return fmt.Sprintf("%s index size %d", flagString(f.flags), f.indexSize)
189 // Minimum and maximum for the size of the index (backward size).
192 maxIndexSize = (1 << 32) * 4
195 // MarshalBinary converts footer values into an xz file footer. Note
196 // that the footer value is checked for correctness.
197 func (f *footer) MarshalBinary() (data []byte, err error) {
198 if err = verifyFlags(f.flags); err != nil {
201 if !(minIndexSize <= f.indexSize && f.indexSize <= maxIndexSize) {
202 return nil, errors.New("xz: index size out of range")
204 if f.indexSize%4 != 0 {
205 return nil, errors.New(
206 "xz: index size not aligned to four bytes")
209 data = make([]byte, footerLen)
211 // backward size (index size)
212 s := (f.indexSize / 4) - 1
213 putUint32LE(data[4:], uint32(s))
217 copy(data[10:], footerMagic)
220 crc := crc32.NewIEEE()
221 crc.Write(data[4:10])
222 putUint32LE(data, crc.Sum32())
227 // UnmarshalBinary sets the footer value by unmarshalling an xz file
229 func (f *footer) UnmarshalBinary(data []byte) error {
230 if len(data) != footerLen {
231 return errors.New("xz: wrong footer length")
235 if !bytes.Equal(data[10:], footerMagic) {
236 return errors.New("xz: footer magic invalid")
240 crc := crc32.NewIEEE()
241 crc.Write(data[4:10])
242 if uint32LE(data) != crc.Sum32() {
243 return errors.New("xz: footer checksum error")
247 // backward size (index size)
248 g.indexSize = (int64(uint32LE(data[4:])) + 1) * 4
252 return errInvalidFlags
255 if err := verifyFlags(g.flags); err != nil {
263 /*** Block Header ***/
265 // blockHeader represents the content of an xz block header.
266 type blockHeader struct {
268 uncompressedSize int64
272 // String converts the block header into a string.
273 func (h blockHeader) String() string {
276 if h.compressedSize >= 0 {
277 fmt.Fprintf(&buf, "compressed size %d", h.compressedSize)
280 if h.uncompressedSize >= 0 {
284 fmt.Fprintf(&buf, "uncompressed size %d", h.uncompressedSize)
287 for _, f := range h.filters {
291 fmt.Fprintf(&buf, "filter %s", f)
297 // Masks for the block flags.
299 filterCountMask = 0x03
300 compressedSizePresent = 0x40
301 uncompressedSizePresent = 0x80
302 reservedBlockFlags = 0x3C
305 // errIndexIndicator signals that an index indicator (0x00) has been found
306 // instead of an expected block header indicator.
307 var errIndexIndicator = errors.New("xz: found index indicator")
309 // readBlockHeader reads the block header.
310 func readBlockHeader(r io.Reader) (h *blockHeader, n int, err error) {
315 z, err := io.CopyN(&buf, r, 1)
322 return nil, n, errIndexIndicator
325 // read complete header
326 headerLen := (int(s) + 1) * 4
327 buf.Grow(headerLen - 1)
328 z, err = io.CopyN(&buf, r, int64(headerLen-1))
334 // unmarshal block header
336 if err = h.UnmarshalBinary(buf.Bytes()); err != nil {
343 // readSizeInBlockHeader reads the uncompressed or compressed size
344 // fields in the block header. The present value informs the function
345 // whether the respective field is actually present in the header.
346 func readSizeInBlockHeader(r io.ByteReader, present bool) (n int64, err error) {
350 x, _, err := readUvarint(r)
355 return 0, errors.New("xz: size overflow in block header")
360 // UnmarshalBinary unmarshals the block header.
361 func (h *blockHeader) UnmarshalBinary(data []byte) error {
362 // Check header length
365 return errIndexIndicator
367 headerLen := (int(s) + 1) * 4
368 if len(data) != headerLen {
369 return fmt.Errorf("xz: data length %d; want %d", len(data),
375 crc := crc32.NewIEEE()
377 if crc.Sum32() != uint32LE(data[n:]) {
378 return errors.New("xz: checksum error for block header")
381 // Block header flags
383 if flags&reservedBlockFlags != 0 {
384 return errors.New("xz: reserved block header flags set")
387 r := bytes.NewReader(data[2:n])
391 h.compressedSize, err = readSizeInBlockHeader(
392 r, flags&compressedSizePresent != 0)
398 h.uncompressedSize, err = readSizeInBlockHeader(
399 r, flags&uncompressedSizePresent != 0)
404 h.filters, err = readFilters(r, int(flags&filterCountMask)+1)
410 // Since headerLen is a multiple of 4 we don't need to check
413 // The standard spec says that the padding should have not more
414 // than 3 bytes. However we found paddings of 4 or 5 in the
415 // wild. See https://github.com/ulikunitz/xz/pull/11 and
416 // https://github.com/ulikunitz/xz/issues/15
418 // The only reasonable approach seems to be to ignore the
419 // padding size. We still check that all padding bytes are zero.
420 if !allZeros(data[n-k : n]) {
426 // MarshalBinary marshals the binary header.
427 func (h *blockHeader) MarshalBinary() (data []byte, err error) {
428 if !(minFilters <= len(h.filters) && len(h.filters) <= maxFilters) {
429 return nil, errors.New("xz: filter count wrong")
431 for i, f := range h.filters {
432 if i < len(h.filters)-1 {
433 if f.id() == lzmaFilterID {
434 return nil, errors.New(
435 "xz: LZMA2 filter is not the last")
439 if f.id() != lzmaFilterID {
440 return nil, errors.New("xz: " +
441 "last filter must be the LZMA2 filter")
447 // header size must set at the end
451 flags := byte(len(h.filters) - 1)
452 if h.compressedSize >= 0 {
453 flags |= compressedSizePresent
455 if h.uncompressedSize >= 0 {
456 flags |= uncompressedSizePresent
460 p := make([]byte, 10)
461 if h.compressedSize >= 0 {
462 k := putUvarint(p, uint64(h.compressedSize))
465 if h.uncompressedSize >= 0 {
466 k := putUvarint(p, uint64(h.uncompressedSize))
470 for _, f := range h.filters {
471 fp, err := f.MarshalBinary()
479 for i := padLen(int64(buf.Len())); i > 0; i-- {
487 if len(data)%4 != 0 {
488 panic("data length not aligned")
491 if !(1 < s && s <= 255) {
492 panic("wrong block header size")
496 crc := crc32.NewIEEE()
497 crc.Write(data[:len(data)-4])
498 putUint32LE(data[len(data)-4:], crc.Sum32())
503 // Constants used for marshalling and unmarshalling filters in the xz
508 minReservedID = 1 << 62
511 // filter represents a filter in the block header.
512 type filter interface {
514 UnmarshalBinary(data []byte) error
515 MarshalBinary() (data []byte, err error)
516 reader(r io.Reader, c *ReaderConfig) (fr io.Reader, err error)
517 writeCloser(w io.WriteCloser, c *WriterConfig) (fw io.WriteCloser, err error)
518 // filter must be last filter
522 // readFilter reads a block filter from the block header. At this point
523 // in time only the LZMA2 filter is supported.
524 func readFilter(r io.Reader) (f filter, err error) {
525 br := lzma.ByteReader(r)
528 id, _, err := readUvarint(br)
536 data = make([]byte, lzmaFilterLen)
537 data[0] = lzmaFilterID
538 if _, err = io.ReadFull(r, data[1:]); err != nil {
543 if id >= minReservedID {
544 return nil, errors.New(
545 "xz: reserved filter id in block stream header")
547 return nil, errors.New("xz: invalid filter id")
549 if err = f.UnmarshalBinary(data); err != nil {
555 // readFilters reads count filters. At this point in time only the count
557 func readFilters(r io.Reader, count int) (filters []filter, err error) {
559 return nil, errors.New("xz: unsupported filter count")
561 f, err := readFilter(r)
565 return []filter{f}, err
568 // writeFilters writes the filters.
569 func writeFilters(w io.Writer, filters []filter) (n int, err error) {
570 for _, f := range filters {
571 p, err := f.MarshalBinary()
586 // record describes a block in the xz file index.
589 uncompressedSize int64
592 // readRecord reads an index record.
593 func readRecord(r io.ByteReader) (rec record, n int, err error) {
594 u, k, err := readUvarint(r)
599 rec.unpaddedSize = int64(u)
600 if rec.unpaddedSize < 0 {
601 return rec, n, errors.New("xz: unpadded size negative")
604 u, k, err = readUvarint(r)
609 rec.uncompressedSize = int64(u)
610 if rec.uncompressedSize < 0 {
611 return rec, n, errors.New("xz: uncompressed size negative")
617 // MarshalBinary converts an index record in its binary encoding.
618 func (rec *record) MarshalBinary() (data []byte, err error) {
619 // maximum length of a uvarint is 10
620 p := make([]byte, 20)
621 n := putUvarint(p, uint64(rec.unpaddedSize))
622 n += putUvarint(p[n:], uint64(rec.uncompressedSize))
626 // writeIndex writes the index, a sequence of records.
627 func writeIndex(w io.Writer, index []record) (n int64, err error) {
628 crc := crc32.NewIEEE()
629 mw := io.MultiWriter(w, crc)
632 k, err := mw.Write([]byte{0})
639 p := make([]byte, 10)
640 k = putUvarint(p, uint64(len(index)))
641 k, err = mw.Write(p[:k])
648 for _, rec := range index {
649 p, err := rec.MarshalBinary()
661 k, err = mw.Write(make([]byte, padLen(int64(n))))
668 putUint32LE(p, crc.Sum32())
669 k, err = w.Write(p[:4])
675 // readIndexBody reads the index from the reader. It assumes that the
676 // index indicator has already been read.
677 func readIndexBody(r io.Reader) (records []record, n int64, err error) {
678 crc := crc32.NewIEEE()
682 br := lzma.ByteReader(io.TeeReader(r, crc))
685 u, k, err := readUvarint(br)
691 if recLen < 0 || uint64(recLen) != u {
692 return nil, n, errors.New("xz: record number overflow")
696 records = make([]record, recLen)
697 for i := range records {
698 records[i], k, err = readRecord(br)
705 p := make([]byte, padLen(int64(n+1)), 4)
706 k, err = io.ReadFull(br.(io.Reader), p)
712 return nil, n, errors.New("xz: non-zero byte in index padding")
718 k, err = io.ReadFull(br.(io.Reader), p)
721 return records, n, err
723 if uint32LE(p) != s {
724 return nil, n, errors.New("xz: wrong checksum for index")
727 return records, n, nil