diff options
author | Jeff Huth <jeff.huth@bytecode.io> | 2019-11-13 17:03:56 -0800 |
---|---|---|
committer | Jeff Huth <jeff.huth@bytecode.io> | 2019-11-13 17:03:56 -0800 |
commit | 89643ba6fa98db82efd3246805ef801a8bfb5c81 (patch) | |
tree | 739027b4e827def2db81631c9d6ed58ec2b97809 /tap_google_sheets/client.py | |
parent | 5f8005471d3affaaf23489df93a58ca64c3da3ca (diff) | |
download | tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.gz tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.zst tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.zip |
Initial commit
Discovery mode works. Still working on normal sync.
Diffstat (limited to 'tap_google_sheets/client.py')
-rw-r--r-- | tap_google_sheets/client.py | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/tap_google_sheets/client.py b/tap_google_sheets/client.py new file mode 100644 index 0000000..12f0811 --- /dev/null +++ b/tap_google_sheets/client.py | |||
@@ -0,0 +1,247 @@ | |||
1 | from datetime import datetime, timedelta | ||
2 | import backoff | ||
3 | import requests | ||
4 | from collections import OrderedDict | ||
5 | |||
6 | import singer | ||
7 | from singer import metrics | ||
8 | from singer import utils | ||
9 | |||
10 | BASE_URL = 'https://www.googleapis.com' | ||
11 | GOOGLE_TOKEN_URI = 'https://oauth2.googleapis.com/token' | ||
12 | LOGGER = singer.get_logger() | ||
13 | |||
14 | |||
15 | class Server5xxError(Exception): | ||
16 | pass | ||
17 | |||
18 | |||
19 | class Server429Error(Exception): | ||
20 | pass | ||
21 | |||
22 | |||
23 | class GoogleError(Exception): | ||
24 | pass | ||
25 | |||
26 | |||
27 | class GoogleBadRequestError(GoogleError): | ||
28 | pass | ||
29 | |||
30 | |||
31 | class GoogleUnauthorizedError(GoogleError): | ||
32 | pass | ||
33 | |||
34 | |||
35 | class GooglePaymentRequiredError(GoogleError): | ||
36 | pass | ||
37 | |||
38 | |||
39 | class GoogleNotFoundError(GoogleError): | ||
40 | pass | ||
41 | |||
42 | |||
43 | class GoogleMethodNotAllowedError(GoogleError): | ||
44 | pass | ||
45 | |||
46 | |||
47 | class GoogleConflictError(GoogleError): | ||
48 | pass | ||
49 | |||
50 | |||
51 | class GoogleGoneError(GoogleError): | ||
52 | pass | ||
53 | |||
54 | |||
55 | class GooglePreconditionFailedError(GoogleError): | ||
56 | pass | ||
57 | |||
58 | |||
59 | class GoogleRequestEntityTooLargeError(GoogleError): | ||
60 | pass | ||
61 | |||
62 | |||
63 | class GoogleRequestedRangeNotSatisfiableError(GoogleError): | ||
64 | pass | ||
65 | |||
66 | |||
67 | class GoogleExpectationFailedError(GoogleError): | ||
68 | pass | ||
69 | |||
70 | |||
71 | class GoogleForbiddenError(GoogleError): | ||
72 | pass | ||
73 | |||
74 | |||
75 | class GoogleUnprocessableEntityError(GoogleError): | ||
76 | pass | ||
77 | |||
78 | |||
79 | class GooglePreconditionRequiredError(GoogleError): | ||
80 | pass | ||
81 | |||
82 | |||
83 | class GoogleInternalServiceError(GoogleError): | ||
84 | pass | ||
85 | |||
86 | |||
87 | # Error Codes: https://developers.google.com/webmaster-tools/search-console-api-original/v3/errors | ||
88 | ERROR_CODE_EXCEPTION_MAPPING = { | ||
89 | 400: GoogleBadRequestError, | ||
90 | 401: GoogleUnauthorizedError, | ||
91 | 402: GooglePaymentRequiredError, | ||
92 | 403: GoogleForbiddenError, | ||
93 | 404: GoogleNotFoundError, | ||
94 | 405: GoogleMethodNotAllowedError, | ||
95 | 409: GoogleConflictError, | ||
96 | 410: GoogleGoneError, | ||
97 | 412: GooglePreconditionFailedError, | ||
98 | 413: GoogleRequestEntityTooLargeError, | ||
99 | 416: GoogleRequestedRangeNotSatisfiableError, | ||
100 | 417: GoogleExpectationFailedError, | ||
101 | 422: GoogleUnprocessableEntityError, | ||
102 | 428: GooglePreconditionRequiredError, | ||
103 | 500: GoogleInternalServiceError} | ||
104 | |||
105 | |||
106 | def get_exception_for_error_code(error_code): | ||
107 | return ERROR_CODE_EXCEPTION_MAPPING.get(error_code, GoogleError) | ||
108 | |||
109 | def raise_for_error(response): | ||
110 | try: | ||
111 | response.raise_for_status() | ||
112 | except (requests.HTTPError, requests.ConnectionError) as error: | ||
113 | try: | ||
114 | content_length = len(response.content) | ||
115 | if content_length == 0: | ||
116 | # There is nothing we can do here since Google has neither sent | ||
117 | # us a 2xx response nor a response content. | ||
118 | return | ||
119 | response = response.json() | ||
120 | if ('error' in response) or ('errorCode' in response): | ||
121 | message = '%s: %s' % (response.get('error', str(error)), | ||
122 | response.get('message', 'Unknown Error')) | ||
123 | error_code = response.get('error', {}).get('code') | ||
124 | ex = get_exception_for_error_code(error_code) | ||
125 | raise ex(message) | ||
126 | else: | ||
127 | raise GoogleError(error) | ||
128 | except (ValueError, TypeError): | ||
129 | raise GoogleError(error) | ||
130 | |||
131 | class GoogleClient: # pylint: disable=too-many-instance-attributes | ||
132 | def __init__(self, | ||
133 | client_id, | ||
134 | client_secret, | ||
135 | refresh_token, | ||
136 | user_agent=None): | ||
137 | self.__client_id = client_id | ||
138 | self.__client_secret = client_secret | ||
139 | self.__refresh_token = refresh_token | ||
140 | self.__user_agent = user_agent | ||
141 | self.__access_token = None | ||
142 | self.__expires = None | ||
143 | self.__session = requests.Session() | ||
144 | self.base_url = None | ||
145 | |||
146 | |||
147 | def __enter__(self): | ||
148 | self.get_access_token() | ||
149 | return self | ||
150 | |||
151 | def __exit__(self, exception_type, exception_value, traceback): | ||
152 | self.__session.close() | ||
153 | |||
154 | @backoff.on_exception(backoff.expo, | ||
155 | Server5xxError, | ||
156 | max_tries=5, | ||
157 | factor=2) | ||
158 | def get_access_token(self): | ||
159 | # The refresh_token never expires and may be used many times to generate each access_token | ||
160 | # Since the refresh_token does not expire, it is not included in get access_token response | ||
161 | if self.__access_token is not None and self.__expires > datetime.utcnow(): | ||
162 | return | ||
163 | |||
164 | headers = {} | ||
165 | if self.__user_agent: | ||
166 | headers['User-Agent'] = self.__user_agent | ||
167 | |||
168 | response = self.__session.post( | ||
169 | url=GOOGLE_TOKEN_URI, | ||
170 | headers=headers, | ||
171 | data={ | ||
172 | 'grant_type': 'refresh_token', | ||
173 | 'client_id': self.__client_id, | ||
174 | 'client_secret': self.__client_secret, | ||
175 | 'refresh_token': self.__refresh_token, | ||
176 | }) | ||
177 | |||
178 | if response.status_code >= 500: | ||
179 | raise Server5xxError() | ||
180 | |||
181 | if response.status_code != 200: | ||
182 | raise_for_error(response) | ||
183 | |||
184 | data = response.json() | ||
185 | self.__access_token = data['access_token'] | ||
186 | self.__expires = datetime.utcnow() + timedelta(seconds=data['expires_in']) | ||
187 | LOGGER.info('Authorized, token expires = {}'.format(self.__expires)) | ||
188 | |||
189 | |||
190 | @backoff.on_exception(backoff.expo, | ||
191 | (Server5xxError, ConnectionError, Server429Error), | ||
192 | max_tries=7, | ||
193 | factor=3) | ||
194 | # Rate Limit: | ||
195 | # https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits | ||
196 | @utils.ratelimit(1200, 60) | ||
197 | def request(self, method, path=None, url=None, api=None, **kwargs): | ||
198 | |||
199 | self.get_access_token() | ||
200 | |||
201 | self.base_url = 'https://sheets.googleapis.com/v4' | ||
202 | if api == 'files': | ||
203 | self.base_url = 'https://www.googleapis.com/drive/v3' | ||
204 | |||
205 | if not url and path: | ||
206 | url = '{}/{}'.format(self.base_url, path) | ||
207 | |||
208 | # endpoint = stream_name (from sync.py API call) | ||
209 | if 'endpoint' in kwargs: | ||
210 | endpoint = kwargs['endpoint'] | ||
211 | del kwargs['endpoint'] | ||
212 | else: | ||
213 | endpoint = None | ||
214 | |||
215 | if 'headers' not in kwargs: | ||
216 | kwargs['headers'] = {} | ||
217 | kwargs['headers']['Authorization'] = 'Bearer {}'.format(self.__access_token) | ||
218 | |||
219 | if self.__user_agent: | ||
220 | kwargs['headers']['User-Agent'] = self.__user_agent | ||
221 | |||
222 | if method == 'POST': | ||
223 | kwargs['headers']['Content-Type'] = 'application/json' | ||
224 | |||
225 | with metrics.http_request_timer(endpoint) as timer: | ||
226 | response = self.__session.request(method, url, **kwargs) | ||
227 | timer.tags[metrics.Tag.http_status_code] = response.status_code | ||
228 | |||
229 | if response.status_code >= 500: | ||
230 | raise Server5xxError() | ||
231 | |||
232 | #Use retry functionality in backoff to wait and retry if | ||
233 | #response code equals 429 because rate limit has been exceeded | ||
234 | if response.status_code == 429: | ||
235 | raise Server429Error() | ||
236 | |||
237 | if response.status_code != 200: | ||
238 | raise_for_error(response) | ||
239 | |||
240 | # Ensure keys and rows are ordered as received from API | ||
241 | return response.json(object_pairs_hook=OrderedDict) | ||
242 | |||
243 | def get(self, path, api, **kwargs): | ||
244 | return self.request(method='GET', path=path, api=api, **kwargs) | ||
245 | |||
246 | def post(self, path, api, **kwargs): | ||
247 | return self.request(method='POST', path=path, api=api, **kwargs) | ||