1 // Copyright 2016 Google LLC
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
31 "cloud.google.com/go/internal/trace"
32 "google.golang.org/api/googleapi"
35 var crc32cTable = crc32.MakeTable(crc32.Castagnoli)
37 // ReaderObjectAttrs are attributes about the object being read. These are populated
38 // during the New call. This struct only holds a subset of object attributes: to
39 // get the full set of attributes, use ObjectHandle.Attrs.
41 // Each field is read-only.
42 type ReaderObjectAttrs struct {
43 // Size is the length of the object's content.
46 // ContentType is the MIME type of the object's content.
49 // ContentEncoding is the encoding of the object's content.
50 ContentEncoding string
52 // CacheControl specifies whether and for how long browser and Internet
53 // caches are allowed to cache your objects.
56 // LastModified is the time that the object was last modified.
57 LastModified time.Time
59 // Generation is the generation number of the object's content.
62 // Metageneration is the version of the metadata for this object at
63 // this generation. This field is used for preconditions and for
64 // detecting changes in metadata. A metageneration number is only
65 // meaningful in the context of a particular generation of a
70 // NewReader creates a new Reader to read the contents of the
72 // ErrObjectNotExist will be returned if the object is not found.
74 // The caller must call Close on the returned Reader when done reading.
75 func (o *ObjectHandle) NewReader(ctx context.Context) (*Reader, error) {
76 return o.NewRangeReader(ctx, 0, -1)
79 // NewRangeReader reads part of an object, reading at most length bytes
80 // starting at the given offset. If length is negative, the object is read
82 func (o *ObjectHandle) NewRangeReader(ctx context.Context, offset, length int64) (r *Reader, err error) {
83 ctx = trace.StartSpan(ctx, "cloud.google.com/go/storage.Object.NewRangeReader")
84 defer func() { trace.EndSpan(ctx, err) }()
86 if err := o.validate(); err != nil {
90 return nil, fmt.Errorf("storage: invalid offset %d < 0", offset)
93 if err := o.conds.validate("NewRangeReader"); err != nil {
99 Host: "storage.googleapis.com",
100 Path: fmt.Sprintf("/%s/%s", o.bucket, o.object),
106 req, err := http.NewRequest(verb, u.String(), nil)
110 req = req.WithContext(ctx)
111 if o.userProject != "" {
112 req.Header.Set("X-Goog-User-Project", o.userProject)
114 if o.readCompressed {
115 req.Header.Set("Accept-Encoding", "gzip")
117 if err := setEncryptionHeaders(req.Header, o.encryptionKey, false); err != nil {
123 // Define a function that initiates a Read with offset and length, assuming we
124 // have already read seen bytes.
125 reopen := func(seen int64) (*http.Response, error) {
126 start := offset + seen
127 if length < 0 && start > 0 {
128 req.Header.Set("Range", fmt.Sprintf("bytes=%d-", start))
129 } else if length > 0 {
130 // The end character isn't affected by how many bytes we've seen.
131 req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", start, offset+length-1))
133 // We wait to assign conditions here because the generation number can change in between reopen() runs.
134 req.URL.RawQuery = conditionsQuery(gen, o.conds)
135 var res *http.Response
136 err = runWithRetry(ctx, func() error {
137 res, err = o.c.hc.Do(req)
141 if res.StatusCode == http.StatusNotFound {
143 return ErrObjectNotExist
145 if res.StatusCode < 200 || res.StatusCode > 299 {
146 body, _ := ioutil.ReadAll(res.Body)
148 return &googleapi.Error{
149 Code: res.StatusCode,
154 if start > 0 && length != 0 && res.StatusCode != http.StatusPartialContent {
156 return errors.New("storage: partial request not satisfied")
158 // If a generation hasn't been specified, and this is the first response we get, let's record the
159 // generation. In future requests we'll use this generation as a precondition to avoid data races.
160 if gen < 0 && res.Header.Get("X-Goog-Generation") != "" {
161 gen64, err := strconv.ParseInt(res.Header.Get("X-Goog-Generation"), 10, 64)
175 res, err := reopen(0)
180 size int64 // total size of object, even if a range was requested.
184 if res.StatusCode == http.StatusPartialContent {
185 cr := strings.TrimSpace(res.Header.Get("Content-Range"))
186 if !strings.HasPrefix(cr, "bytes ") || !strings.Contains(cr, "/") {
188 return nil, fmt.Errorf("storage: invalid Content-Range %q", cr)
190 size, err = strconv.ParseInt(cr[strings.LastIndex(cr, "/")+1:], 10, 64)
192 return nil, fmt.Errorf("storage: invalid Content-Range %q", cr)
195 size = res.ContentLength
196 // Check the CRC iff all of the following hold:
197 // - We asked for content (length != 0).
198 // - We got all the content (status != PartialContent).
199 // - The server sent a CRC header.
200 // - The Go http stack did not uncompress the file.
201 // - We were not served compressed data that was uncompressed on download.
202 // The problem with the last two cases is that the CRC will not match -- GCS
203 // computes it on the compressed contents, but we compute it on the
204 // uncompressed contents.
205 if length != 0 && !res.Uncompressed && !uncompressedByServer(res) {
206 crc, checkCRC = parseCRC32c(res)
210 remain := res.ContentLength
218 if res.Header.Get("X-Goog-Generation") != "" {
219 metaGen, err = strconv.ParseInt(res.Header.Get("X-Goog-Metageneration"), 10, 64)
226 if res.Header.Get("Last-Modified") != "" {
227 lm, err = http.ParseTime(res.Header.Get("Last-Modified"))
233 attrs := ReaderObjectAttrs{
235 ContentType: res.Header.Get("Content-Type"),
236 ContentEncoding: res.Header.Get("Content-Encoding"),
237 CacheControl: res.Header.Get("Cache-Control"),
240 Metageneration: metaGen,
253 func uncompressedByServer(res *http.Response) bool {
254 // If the data is stored as gzip but is not encoded as gzip, then it
255 // was uncompressed by the server.
256 return res.Header.Get("X-Goog-Stored-Content-Encoding") == "gzip" &&
257 res.Header.Get("Content-Encoding") != "gzip"
260 func parseCRC32c(res *http.Response) (uint32, bool) {
261 const prefix = "crc32c="
262 for _, spec := range res.Header["X-Goog-Hash"] {
263 if strings.HasPrefix(spec, prefix) {
264 c, err := decodeUint32(spec[len(prefix):])
273 var emptyBody = ioutil.NopCloser(strings.NewReader(""))
275 // Reader reads a Cloud Storage object.
276 // It implements io.Reader.
278 // Typically, a Reader computes the CRC of the downloaded content and compares it to
279 // the stored CRC, returning an error from Read if there is a mismatch. This integrity check
280 // is skipped if transcoding occurs. See https://cloud.google.com/storage/docs/transcoding.
282 Attrs ReaderObjectAttrs
284 seen, remain, size int64
285 checkCRC bool // should we check the CRC?
286 wantCRC uint32 // the CRC32c value the server sent in the header
287 gotCRC uint32 // running crc
288 reopen func(seen int64) (*http.Response, error)
291 // Close closes the Reader. It must be called when done reading.
292 func (r *Reader) Close() error {
293 return r.body.Close()
296 func (r *Reader) Read(p []byte) (int, error) {
297 n, err := r.readWithRetry(p)
302 r.gotCRC = crc32.Update(r.gotCRC, crc32cTable, p[:n])
303 // Check CRC here. It would be natural to check it in Close, but
304 // everybody defers Close on the assumption that it doesn't return
305 // anything worth looking at.
307 if r.gotCRC != r.wantCRC {
308 return n, fmt.Errorf("storage: bad CRC on read: got %d, want %d",
316 func (r *Reader) readWithRetry(p []byte) (int, error) {
319 m, err := r.body.Read(p[n:])
322 if !shouldRetryRead(err) {
325 // Read failed, but we will try again. Send a ranged read request that takes
326 // into account the number of bytes we've already seen.
327 res, err := r.reopen(r.seen)
329 // reopen already retries
338 func shouldRetryRead(err error) bool {
342 return strings.HasSuffix(err.Error(), "INTERNAL_ERROR") && strings.Contains(reflect.TypeOf(err).String(), "http2")
345 // Size returns the size of the object in bytes.
346 // The returned value is always the same and is not affected by
347 // calls to Read or Close.
349 // Deprecated: use Reader.Attrs.Size.
350 func (r *Reader) Size() int64 {
354 // Remain returns the number of bytes left to read, or -1 if unknown.
355 func (r *Reader) Remain() int64 {
359 // ContentType returns the content type of the object.
361 // Deprecated: use Reader.Attrs.ContentType.
362 func (r *Reader) ContentType() string {
363 return r.Attrs.ContentType
366 // ContentEncoding returns the content encoding of the object.
368 // Deprecated: use Reader.Attrs.ContentEncoding.
369 func (r *Reader) ContentEncoding() string {
370 return r.Attrs.ContentEncoding
373 // CacheControl returns the cache control of the object.
375 // Deprecated: use Reader.Attrs.CacheControl.
376 func (r *Reader) CacheControl() string {
377 return r.Attrs.CacheControl
380 // LastModified returns the value of the Last-Modified header.
382 // Deprecated: use Reader.Attrs.LastModified.
383 func (r *Reader) LastModified() (time.Time, error) {
384 return r.Attrs.LastModified, nil