]>
Commit | Line | Data |
---|---|---|
15c0b25d AP |
1 | // Copyright 2014-2017 Ulrich Kunitz. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | package lzma | |
6 | ||
7 | import ( | |
8 | "fmt" | |
9 | "io" | |
10 | ) | |
11 | ||
12 | // opLenMargin provides the upper limit of the number of bytes required | |
13 | // to encode a single operation. | |
107c1cdb | 14 | const opLenMargin = 16 |
15c0b25d AP |
15 | |
16 | // compressFlags control the compression process. | |
17 | type compressFlags uint32 | |
18 | ||
19 | // Values for compressFlags. | |
20 | const ( | |
21 | // all data should be compressed, even if compression is not | |
22 | // optimal. | |
23 | all compressFlags = 1 << iota | |
24 | ) | |
25 | ||
26 | // encoderFlags provide the flags for an encoder. | |
27 | type encoderFlags uint32 | |
28 | ||
29 | // Flags for the encoder. | |
30 | const ( | |
31 | // eosMarker requests an EOS marker to be written. | |
32 | eosMarker encoderFlags = 1 << iota | |
33 | ) | |
34 | ||
35 | // Encoder compresses data buffered in the encoder dictionary and writes | |
36 | // it into a byte writer. | |
37 | type encoder struct { | |
38 | dict *encoderDict | |
39 | state *state | |
40 | re *rangeEncoder | |
41 | start int64 | |
42 | // generate eos marker | |
43 | marker bool | |
44 | limit bool | |
45 | margin int | |
46 | } | |
47 | ||
48 | // newEncoder creates a new encoder. If the byte writer must be | |
49 | // limited use LimitedByteWriter provided by this package. The flags | |
50 | // argument supports the eosMarker flag, controlling whether a | |
51 | // terminating end-of-stream marker must be written. | |
52 | func newEncoder(bw io.ByteWriter, state *state, dict *encoderDict, | |
53 | flags encoderFlags) (e *encoder, err error) { | |
54 | ||
55 | re, err := newRangeEncoder(bw) | |
56 | if err != nil { | |
57 | return nil, err | |
58 | } | |
59 | e = &encoder{ | |
60 | dict: dict, | |
61 | state: state, | |
62 | re: re, | |
63 | marker: flags&eosMarker != 0, | |
64 | start: dict.Pos(), | |
65 | margin: opLenMargin, | |
66 | } | |
67 | if e.marker { | |
68 | e.margin += 5 | |
69 | } | |
70 | return e, nil | |
71 | } | |
72 | ||
73 | // Write writes the bytes from p into the dictionary. If not enough | |
74 | // space is available the data in the dictionary buffer will be | |
75 | // compressed to make additional space available. If the limit of the | |
76 | // underlying writer has been reached ErrLimit will be returned. | |
77 | func (e *encoder) Write(p []byte) (n int, err error) { | |
78 | for { | |
79 | k, err := e.dict.Write(p[n:]) | |
80 | n += k | |
81 | if err == ErrNoSpace { | |
82 | if err = e.compress(0); err != nil { | |
83 | return n, err | |
84 | } | |
85 | continue | |
86 | } | |
87 | return n, err | |
88 | } | |
89 | } | |
90 | ||
91 | // Reopen reopens the encoder with a new byte writer. | |
92 | func (e *encoder) Reopen(bw io.ByteWriter) error { | |
93 | var err error | |
94 | if e.re, err = newRangeEncoder(bw); err != nil { | |
95 | return err | |
96 | } | |
97 | e.start = e.dict.Pos() | |
98 | e.limit = false | |
99 | return nil | |
100 | } | |
101 | ||
102 | // writeLiteral writes a literal into the LZMA stream | |
103 | func (e *encoder) writeLiteral(l lit) error { | |
104 | var err error | |
105 | state, state2, _ := e.state.states(e.dict.Pos()) | |
106 | if err = e.state.isMatch[state2].Encode(e.re, 0); err != nil { | |
107 | return err | |
108 | } | |
109 | litState := e.state.litState(e.dict.ByteAt(1), e.dict.Pos()) | |
110 | match := e.dict.ByteAt(int(e.state.rep[0]) + 1) | |
111 | err = e.state.litCodec.Encode(e.re, l.b, state, match, litState) | |
112 | if err != nil { | |
113 | return err | |
114 | } | |
115 | e.state.updateStateLiteral() | |
116 | return nil | |
117 | } | |
118 | ||
119 | // iverson implements the Iverson operator as proposed by Donald Knuth in his | |
120 | // book Concrete Mathematics. | |
121 | func iverson(ok bool) uint32 { | |
122 | if ok { | |
123 | return 1 | |
124 | } | |
125 | return 0 | |
126 | } | |
127 | ||
128 | // writeMatch writes a repetition operation into the operation stream | |
129 | func (e *encoder) writeMatch(m match) error { | |
130 | var err error | |
131 | if !(minDistance <= m.distance && m.distance <= maxDistance) { | |
132 | panic(fmt.Errorf("match distance %d out of range", m.distance)) | |
133 | } | |
134 | dist := uint32(m.distance - minDistance) | |
135 | if !(minMatchLen <= m.n && m.n <= maxMatchLen) && | |
136 | !(dist == e.state.rep[0] && m.n == 1) { | |
137 | panic(fmt.Errorf( | |
138 | "match length %d out of range; dist %d rep[0] %d", | |
139 | m.n, dist, e.state.rep[0])) | |
140 | } | |
141 | state, state2, posState := e.state.states(e.dict.Pos()) | |
142 | if err = e.state.isMatch[state2].Encode(e.re, 1); err != nil { | |
143 | return err | |
144 | } | |
145 | g := 0 | |
146 | for ; g < 4; g++ { | |
147 | if e.state.rep[g] == dist { | |
148 | break | |
149 | } | |
150 | } | |
151 | b := iverson(g < 4) | |
152 | if err = e.state.isRep[state].Encode(e.re, b); err != nil { | |
153 | return err | |
154 | } | |
155 | n := uint32(m.n - minMatchLen) | |
156 | if b == 0 { | |
157 | // simple match | |
158 | e.state.rep[3], e.state.rep[2], e.state.rep[1], e.state.rep[0] = | |
159 | e.state.rep[2], e.state.rep[1], e.state.rep[0], dist | |
160 | e.state.updateStateMatch() | |
161 | if err = e.state.lenCodec.Encode(e.re, n, posState); err != nil { | |
162 | return err | |
163 | } | |
164 | return e.state.distCodec.Encode(e.re, dist, n) | |
165 | } | |
166 | b = iverson(g != 0) | |
167 | if err = e.state.isRepG0[state].Encode(e.re, b); err != nil { | |
168 | return err | |
169 | } | |
170 | if b == 0 { | |
171 | // g == 0 | |
172 | b = iverson(m.n != 1) | |
173 | if err = e.state.isRepG0Long[state2].Encode(e.re, b); err != nil { | |
174 | return err | |
175 | } | |
176 | if b == 0 { | |
177 | e.state.updateStateShortRep() | |
178 | return nil | |
179 | } | |
180 | } else { | |
181 | // g in {1,2,3} | |
182 | b = iverson(g != 1) | |
183 | if err = e.state.isRepG1[state].Encode(e.re, b); err != nil { | |
184 | return err | |
185 | } | |
186 | if b == 1 { | |
187 | // g in {2,3} | |
188 | b = iverson(g != 2) | |
189 | err = e.state.isRepG2[state].Encode(e.re, b) | |
190 | if err != nil { | |
191 | return err | |
192 | } | |
193 | if b == 1 { | |
194 | e.state.rep[3] = e.state.rep[2] | |
195 | } | |
196 | e.state.rep[2] = e.state.rep[1] | |
197 | } | |
198 | e.state.rep[1] = e.state.rep[0] | |
199 | e.state.rep[0] = dist | |
200 | } | |
201 | e.state.updateStateRep() | |
202 | return e.state.repLenCodec.Encode(e.re, n, posState) | |
203 | } | |
204 | ||
205 | // writeOp writes a single operation to the range encoder. The function | |
206 | // checks whether there is enough space available to close the LZMA | |
207 | // stream. | |
208 | func (e *encoder) writeOp(op operation) error { | |
209 | if e.re.Available() < int64(e.margin) { | |
210 | return ErrLimit | |
211 | } | |
212 | switch x := op.(type) { | |
213 | case lit: | |
214 | return e.writeLiteral(x) | |
215 | case match: | |
216 | return e.writeMatch(x) | |
217 | default: | |
218 | panic("unexpected operation") | |
219 | } | |
220 | } | |
221 | ||
222 | // compress compressed data from the dictionary buffer. If the flag all | |
223 | // is set, all data in the dictionary buffer will be compressed. The | |
224 | // function returns ErrLimit if the underlying writer has reached its | |
225 | // limit. | |
226 | func (e *encoder) compress(flags compressFlags) error { | |
227 | n := 0 | |
228 | if flags&all == 0 { | |
229 | n = maxMatchLen - 1 | |
230 | } | |
231 | d := e.dict | |
232 | m := d.m | |
233 | for d.Buffered() > n { | |
234 | op := m.NextOp(e.state.rep) | |
235 | if err := e.writeOp(op); err != nil { | |
236 | return err | |
237 | } | |
238 | d.Discard(op.Len()) | |
239 | } | |
240 | return nil | |
241 | } | |
242 | ||
243 | // eosMatch is a pseudo operation that indicates the end of the stream. | |
244 | var eosMatch = match{distance: maxDistance, n: minMatchLen} | |
245 | ||
246 | // Close terminates the LZMA stream. If requested the end-of-stream | |
247 | // marker will be written. If the byte writer limit has been or will be | |
248 | // reached during compression of the remaining data in the buffer the | |
249 | // LZMA stream will be closed and data will remain in the buffer. | |
250 | func (e *encoder) Close() error { | |
251 | err := e.compress(all) | |
252 | if err != nil && err != ErrLimit { | |
253 | return err | |
254 | } | |
255 | if e.marker { | |
256 | if err := e.writeMatch(eosMatch); err != nil { | |
257 | return err | |
258 | } | |
259 | } | |
260 | err = e.re.Close() | |
261 | return err | |
262 | } | |
263 | ||
264 | // Compressed returns the number bytes of the input data that been | |
265 | // compressed. | |
266 | func (e *encoder) Compressed() int64 { | |
267 | return e.dict.Pos() - e.start | |
268 | } |