]>
Commit | Line | Data |
---|---|---|
107c1cdb ND |
1 | // Copyright 2016 Google LLC |
2 | // | |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | // you may not use this file except in compliance with the License. | |
5 | // You may obtain a copy of the License at | |
6 | // | |
7 | // http://www.apache.org/licenses/LICENSE-2.0 | |
8 | // | |
9 | // Unless required by applicable law or agreed to in writing, software | |
10 | // distributed under the License is distributed on an "AS IS" BASIS, | |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | // See the License for the specific language governing permissions and | |
13 | // limitations under the License. | |
14 | ||
15 | package storage | |
16 | ||
17 | import ( | |
18 | "context" | |
19 | "errors" | |
20 | "fmt" | |
21 | "hash/crc32" | |
22 | "io" | |
23 | "io/ioutil" | |
24 | "net/http" | |
25 | "net/url" | |
26 | "reflect" | |
27 | "strconv" | |
28 | "strings" | |
29 | "time" | |
30 | ||
31 | "cloud.google.com/go/internal/trace" | |
32 | "google.golang.org/api/googleapi" | |
33 | ) | |
34 | ||
35 | var crc32cTable = crc32.MakeTable(crc32.Castagnoli) | |
36 | ||
37 | // ReaderObjectAttrs are attributes about the object being read. These are populated | |
38 | // during the New call. This struct only holds a subset of object attributes: to | |
39 | // get the full set of attributes, use ObjectHandle.Attrs. | |
40 | // | |
41 | // Each field is read-only. | |
42 | type ReaderObjectAttrs struct { | |
43 | // Size is the length of the object's content. | |
44 | Size int64 | |
45 | ||
46 | // ContentType is the MIME type of the object's content. | |
47 | ContentType string | |
48 | ||
49 | // ContentEncoding is the encoding of the object's content. | |
50 | ContentEncoding string | |
51 | ||
52 | // CacheControl specifies whether and for how long browser and Internet | |
53 | // caches are allowed to cache your objects. | |
54 | CacheControl string | |
55 | ||
56 | // LastModified is the time that the object was last modified. | |
57 | LastModified time.Time | |
58 | ||
59 | // Generation is the generation number of the object's content. | |
60 | Generation int64 | |
61 | ||
62 | // Metageneration is the version of the metadata for this object at | |
63 | // this generation. This field is used for preconditions and for | |
64 | // detecting changes in metadata. A metageneration number is only | |
65 | // meaningful in the context of a particular generation of a | |
66 | // particular object. | |
67 | Metageneration int64 | |
68 | } | |
69 | ||
70 | // NewReader creates a new Reader to read the contents of the | |
71 | // object. | |
72 | // ErrObjectNotExist will be returned if the object is not found. | |
73 | // | |
74 | // The caller must call Close on the returned Reader when done reading. | |
75 | func (o *ObjectHandle) NewReader(ctx context.Context) (*Reader, error) { | |
76 | return o.NewRangeReader(ctx, 0, -1) | |
77 | } | |
78 | ||
79 | // NewRangeReader reads part of an object, reading at most length bytes | |
80 | // starting at the given offset. If length is negative, the object is read | |
81 | // until the end. | |
82 | func (o *ObjectHandle) NewRangeReader(ctx context.Context, offset, length int64) (r *Reader, err error) { | |
83 | ctx = trace.StartSpan(ctx, "cloud.google.com/go/storage.Object.NewRangeReader") | |
84 | defer func() { trace.EndSpan(ctx, err) }() | |
85 | ||
86 | if err := o.validate(); err != nil { | |
87 | return nil, err | |
88 | } | |
89 | if offset < 0 { | |
90 | return nil, fmt.Errorf("storage: invalid offset %d < 0", offset) | |
91 | } | |
92 | if o.conds != nil { | |
93 | if err := o.conds.validate("NewRangeReader"); err != nil { | |
94 | return nil, err | |
95 | } | |
96 | } | |
97 | u := &url.URL{ | |
98 | Scheme: "https", | |
99 | Host: "storage.googleapis.com", | |
100 | Path: fmt.Sprintf("/%s/%s", o.bucket, o.object), | |
101 | } | |
102 | verb := "GET" | |
103 | if length == 0 { | |
104 | verb = "HEAD" | |
105 | } | |
106 | req, err := http.NewRequest(verb, u.String(), nil) | |
107 | if err != nil { | |
108 | return nil, err | |
109 | } | |
110 | req = req.WithContext(ctx) | |
111 | if o.userProject != "" { | |
112 | req.Header.Set("X-Goog-User-Project", o.userProject) | |
113 | } | |
114 | if o.readCompressed { | |
115 | req.Header.Set("Accept-Encoding", "gzip") | |
116 | } | |
117 | if err := setEncryptionHeaders(req.Header, o.encryptionKey, false); err != nil { | |
118 | return nil, err | |
119 | } | |
120 | ||
121 | gen := o.gen | |
122 | ||
123 | // Define a function that initiates a Read with offset and length, assuming we | |
124 | // have already read seen bytes. | |
125 | reopen := func(seen int64) (*http.Response, error) { | |
126 | start := offset + seen | |
127 | if length < 0 && start > 0 { | |
128 | req.Header.Set("Range", fmt.Sprintf("bytes=%d-", start)) | |
129 | } else if length > 0 { | |
130 | // The end character isn't affected by how many bytes we've seen. | |
131 | req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", start, offset+length-1)) | |
132 | } | |
133 | // We wait to assign conditions here because the generation number can change in between reopen() runs. | |
134 | req.URL.RawQuery = conditionsQuery(gen, o.conds) | |
135 | var res *http.Response | |
136 | err = runWithRetry(ctx, func() error { | |
137 | res, err = o.c.hc.Do(req) | |
138 | if err != nil { | |
139 | return err | |
140 | } | |
141 | if res.StatusCode == http.StatusNotFound { | |
142 | res.Body.Close() | |
143 | return ErrObjectNotExist | |
144 | } | |
145 | if res.StatusCode < 200 || res.StatusCode > 299 { | |
146 | body, _ := ioutil.ReadAll(res.Body) | |
147 | res.Body.Close() | |
148 | return &googleapi.Error{ | |
149 | Code: res.StatusCode, | |
150 | Header: res.Header, | |
151 | Body: string(body), | |
152 | } | |
153 | } | |
154 | if start > 0 && length != 0 && res.StatusCode != http.StatusPartialContent { | |
155 | res.Body.Close() | |
156 | return errors.New("storage: partial request not satisfied") | |
157 | } | |
158 | // If a generation hasn't been specified, and this is the first response we get, let's record the | |
159 | // generation. In future requests we'll use this generation as a precondition to avoid data races. | |
160 | if gen < 0 && res.Header.Get("X-Goog-Generation") != "" { | |
161 | gen64, err := strconv.ParseInt(res.Header.Get("X-Goog-Generation"), 10, 64) | |
162 | if err != nil { | |
163 | return err | |
164 | } | |
165 | gen = gen64 | |
166 | } | |
167 | return nil | |
168 | }) | |
169 | if err != nil { | |
170 | return nil, err | |
171 | } | |
172 | return res, nil | |
173 | } | |
174 | ||
175 | res, err := reopen(0) | |
176 | if err != nil { | |
177 | return nil, err | |
178 | } | |
179 | var ( | |
180 | size int64 // total size of object, even if a range was requested. | |
181 | checkCRC bool | |
182 | crc uint32 | |
183 | ) | |
184 | if res.StatusCode == http.StatusPartialContent { | |
185 | cr := strings.TrimSpace(res.Header.Get("Content-Range")) | |
186 | if !strings.HasPrefix(cr, "bytes ") || !strings.Contains(cr, "/") { | |
187 | ||
188 | return nil, fmt.Errorf("storage: invalid Content-Range %q", cr) | |
189 | } | |
190 | size, err = strconv.ParseInt(cr[strings.LastIndex(cr, "/")+1:], 10, 64) | |
191 | if err != nil { | |
192 | return nil, fmt.Errorf("storage: invalid Content-Range %q", cr) | |
193 | } | |
194 | } else { | |
195 | size = res.ContentLength | |
196 | // Check the CRC iff all of the following hold: | |
197 | // - We asked for content (length != 0). | |
198 | // - We got all the content (status != PartialContent). | |
199 | // - The server sent a CRC header. | |
200 | // - The Go http stack did not uncompress the file. | |
201 | // - We were not served compressed data that was uncompressed on download. | |
202 | // The problem with the last two cases is that the CRC will not match -- GCS | |
203 | // computes it on the compressed contents, but we compute it on the | |
204 | // uncompressed contents. | |
205 | if length != 0 && !res.Uncompressed && !uncompressedByServer(res) { | |
206 | crc, checkCRC = parseCRC32c(res) | |
207 | } | |
208 | } | |
209 | ||
210 | remain := res.ContentLength | |
211 | body := res.Body | |
212 | if length == 0 { | |
213 | remain = 0 | |
214 | body.Close() | |
215 | body = emptyBody | |
216 | } | |
217 | var metaGen int64 | |
218 | if res.Header.Get("X-Goog-Generation") != "" { | |
219 | metaGen, err = strconv.ParseInt(res.Header.Get("X-Goog-Metageneration"), 10, 64) | |
220 | if err != nil { | |
221 | return nil, err | |
222 | } | |
223 | } | |
224 | ||
225 | var lm time.Time | |
226 | if res.Header.Get("Last-Modified") != "" { | |
227 | lm, err = http.ParseTime(res.Header.Get("Last-Modified")) | |
228 | if err != nil { | |
229 | return nil, err | |
230 | } | |
231 | } | |
232 | ||
233 | attrs := ReaderObjectAttrs{ | |
234 | Size: size, | |
235 | ContentType: res.Header.Get("Content-Type"), | |
236 | ContentEncoding: res.Header.Get("Content-Encoding"), | |
237 | CacheControl: res.Header.Get("Cache-Control"), | |
238 | LastModified: lm, | |
239 | Generation: gen, | |
240 | Metageneration: metaGen, | |
241 | } | |
242 | return &Reader{ | |
243 | Attrs: attrs, | |
244 | body: body, | |
245 | size: size, | |
246 | remain: remain, | |
247 | wantCRC: crc, | |
248 | checkCRC: checkCRC, | |
249 | reopen: reopen, | |
250 | }, nil | |
251 | } | |
252 | ||
253 | func uncompressedByServer(res *http.Response) bool { | |
254 | // If the data is stored as gzip but is not encoded as gzip, then it | |
255 | // was uncompressed by the server. | |
256 | return res.Header.Get("X-Goog-Stored-Content-Encoding") == "gzip" && | |
257 | res.Header.Get("Content-Encoding") != "gzip" | |
258 | } | |
259 | ||
260 | func parseCRC32c(res *http.Response) (uint32, bool) { | |
261 | const prefix = "crc32c=" | |
262 | for _, spec := range res.Header["X-Goog-Hash"] { | |
263 | if strings.HasPrefix(spec, prefix) { | |
264 | c, err := decodeUint32(spec[len(prefix):]) | |
265 | if err == nil { | |
266 | return c, true | |
267 | } | |
268 | } | |
269 | } | |
270 | return 0, false | |
271 | } | |
272 | ||
273 | var emptyBody = ioutil.NopCloser(strings.NewReader("")) | |
274 | ||
275 | // Reader reads a Cloud Storage object. | |
276 | // It implements io.Reader. | |
277 | // | |
278 | // Typically, a Reader computes the CRC of the downloaded content and compares it to | |
279 | // the stored CRC, returning an error from Read if there is a mismatch. This integrity check | |
280 | // is skipped if transcoding occurs. See https://cloud.google.com/storage/docs/transcoding. | |
281 | type Reader struct { | |
282 | Attrs ReaderObjectAttrs | |
283 | body io.ReadCloser | |
284 | seen, remain, size int64 | |
285 | checkCRC bool // should we check the CRC? | |
286 | wantCRC uint32 // the CRC32c value the server sent in the header | |
287 | gotCRC uint32 // running crc | |
288 | reopen func(seen int64) (*http.Response, error) | |
289 | } | |
290 | ||
291 | // Close closes the Reader. It must be called when done reading. | |
292 | func (r *Reader) Close() error { | |
293 | return r.body.Close() | |
294 | } | |
295 | ||
296 | func (r *Reader) Read(p []byte) (int, error) { | |
297 | n, err := r.readWithRetry(p) | |
298 | if r.remain != -1 { | |
299 | r.remain -= int64(n) | |
300 | } | |
301 | if r.checkCRC { | |
302 | r.gotCRC = crc32.Update(r.gotCRC, crc32cTable, p[:n]) | |
303 | // Check CRC here. It would be natural to check it in Close, but | |
304 | // everybody defers Close on the assumption that it doesn't return | |
305 | // anything worth looking at. | |
306 | if err == io.EOF { | |
307 | if r.gotCRC != r.wantCRC { | |
308 | return n, fmt.Errorf("storage: bad CRC on read: got %d, want %d", | |
309 | r.gotCRC, r.wantCRC) | |
310 | } | |
311 | } | |
312 | } | |
313 | return n, err | |
314 | } | |
315 | ||
316 | func (r *Reader) readWithRetry(p []byte) (int, error) { | |
317 | n := 0 | |
318 | for len(p[n:]) > 0 { | |
319 | m, err := r.body.Read(p[n:]) | |
320 | n += m | |
321 | r.seen += int64(m) | |
322 | if !shouldRetryRead(err) { | |
323 | return n, err | |
324 | } | |
325 | // Read failed, but we will try again. Send a ranged read request that takes | |
326 | // into account the number of bytes we've already seen. | |
327 | res, err := r.reopen(r.seen) | |
328 | if err != nil { | |
329 | // reopen already retries | |
330 | return n, err | |
331 | } | |
332 | r.body.Close() | |
333 | r.body = res.Body | |
334 | } | |
335 | return n, nil | |
336 | } | |
337 | ||
338 | func shouldRetryRead(err error) bool { | |
339 | if err == nil { | |
340 | return false | |
341 | } | |
342 | return strings.HasSuffix(err.Error(), "INTERNAL_ERROR") && strings.Contains(reflect.TypeOf(err).String(), "http2") | |
343 | } | |
344 | ||
345 | // Size returns the size of the object in bytes. | |
346 | // The returned value is always the same and is not affected by | |
347 | // calls to Read or Close. | |
348 | // | |
349 | // Deprecated: use Reader.Attrs.Size. | |
350 | func (r *Reader) Size() int64 { | |
351 | return r.Attrs.Size | |
352 | } | |
353 | ||
354 | // Remain returns the number of bytes left to read, or -1 if unknown. | |
355 | func (r *Reader) Remain() int64 { | |
356 | return r.remain | |
357 | } | |
358 | ||
359 | // ContentType returns the content type of the object. | |
360 | // | |
361 | // Deprecated: use Reader.Attrs.ContentType. | |
362 | func (r *Reader) ContentType() string { | |
363 | return r.Attrs.ContentType | |
364 | } | |
365 | ||
366 | // ContentEncoding returns the content encoding of the object. | |
367 | // | |
368 | // Deprecated: use Reader.Attrs.ContentEncoding. | |
369 | func (r *Reader) ContentEncoding() string { | |
370 | return r.Attrs.ContentEncoding | |
371 | } | |
372 | ||
373 | // CacheControl returns the cache control of the object. | |
374 | // | |
375 | // Deprecated: use Reader.Attrs.CacheControl. | |
376 | func (r *Reader) CacheControl() string { | |
377 | return r.Attrs.CacheControl | |
378 | } | |
379 | ||
380 | // LastModified returns the value of the Last-Modified header. | |
381 | // | |
382 | // Deprecated: use Reader.Attrs.LastModified. | |
383 | func (r *Reader) LastModified() (time.Time, error) { | |
384 | return r.Attrs.LastModified, nil | |
385 | } |