aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/client.py
diff options
context:
space:
mode:
authorJeff Huth <jeff.huth@bytecode.io>2019-11-13 17:03:56 -0800
committerJeff Huth <jeff.huth@bytecode.io>2019-11-13 17:03:56 -0800
commit89643ba6fa98db82efd3246805ef801a8bfb5c81 (patch)
tree739027b4e827def2db81631c9d6ed58ec2b97809 /tap_google_sheets/client.py
parent5f8005471d3affaaf23489df93a58ca64c3da3ca (diff)
downloadtap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.gz
tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.zst
tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.zip
Initial commit
Discovery mode works. Still working on normal sync.
Diffstat (limited to 'tap_google_sheets/client.py')
-rw-r--r--tap_google_sheets/client.py247
1 files changed, 247 insertions, 0 deletions
diff --git a/tap_google_sheets/client.py b/tap_google_sheets/client.py
new file mode 100644
index 0000000..12f0811
--- /dev/null
+++ b/tap_google_sheets/client.py
@@ -0,0 +1,247 @@
1from datetime import datetime, timedelta
2import backoff
3import requests
4from collections import OrderedDict
5
6import singer
7from singer import metrics
8from singer import utils
9
10BASE_URL = 'https://www.googleapis.com'
11GOOGLE_TOKEN_URI = 'https://oauth2.googleapis.com/token'
12LOGGER = singer.get_logger()
13
14
15class Server5xxError(Exception):
16 pass
17
18
19class Server429Error(Exception):
20 pass
21
22
23class GoogleError(Exception):
24 pass
25
26
27class GoogleBadRequestError(GoogleError):
28 pass
29
30
31class GoogleUnauthorizedError(GoogleError):
32 pass
33
34
35class GooglePaymentRequiredError(GoogleError):
36 pass
37
38
39class GoogleNotFoundError(GoogleError):
40 pass
41
42
43class GoogleMethodNotAllowedError(GoogleError):
44 pass
45
46
47class GoogleConflictError(GoogleError):
48 pass
49
50
51class GoogleGoneError(GoogleError):
52 pass
53
54
55class GooglePreconditionFailedError(GoogleError):
56 pass
57
58
59class GoogleRequestEntityTooLargeError(GoogleError):
60 pass
61
62
63class GoogleRequestedRangeNotSatisfiableError(GoogleError):
64 pass
65
66
67class GoogleExpectationFailedError(GoogleError):
68 pass
69
70
71class GoogleForbiddenError(GoogleError):
72 pass
73
74
75class GoogleUnprocessableEntityError(GoogleError):
76 pass
77
78
79class GooglePreconditionRequiredError(GoogleError):
80 pass
81
82
83class GoogleInternalServiceError(GoogleError):
84 pass
85
86
87# Error Codes: https://developers.google.com/webmaster-tools/search-console-api-original/v3/errors
88ERROR_CODE_EXCEPTION_MAPPING = {
89 400: GoogleBadRequestError,
90 401: GoogleUnauthorizedError,
91 402: GooglePaymentRequiredError,
92 403: GoogleForbiddenError,
93 404: GoogleNotFoundError,
94 405: GoogleMethodNotAllowedError,
95 409: GoogleConflictError,
96 410: GoogleGoneError,
97 412: GooglePreconditionFailedError,
98 413: GoogleRequestEntityTooLargeError,
99 416: GoogleRequestedRangeNotSatisfiableError,
100 417: GoogleExpectationFailedError,
101 422: GoogleUnprocessableEntityError,
102 428: GooglePreconditionRequiredError,
103 500: GoogleInternalServiceError}
104
105
106def get_exception_for_error_code(error_code):
107 return ERROR_CODE_EXCEPTION_MAPPING.get(error_code, GoogleError)
108
109def raise_for_error(response):
110 try:
111 response.raise_for_status()
112 except (requests.HTTPError, requests.ConnectionError) as error:
113 try:
114 content_length = len(response.content)
115 if content_length == 0:
116 # There is nothing we can do here since Google has neither sent
117 # us a 2xx response nor a response content.
118 return
119 response = response.json()
120 if ('error' in response) or ('errorCode' in response):
121 message = '%s: %s' % (response.get('error', str(error)),
122 response.get('message', 'Unknown Error'))
123 error_code = response.get('error', {}).get('code')
124 ex = get_exception_for_error_code(error_code)
125 raise ex(message)
126 else:
127 raise GoogleError(error)
128 except (ValueError, TypeError):
129 raise GoogleError(error)
130
131class GoogleClient: # pylint: disable=too-many-instance-attributes
132 def __init__(self,
133 client_id,
134 client_secret,
135 refresh_token,
136 user_agent=None):
137 self.__client_id = client_id
138 self.__client_secret = client_secret
139 self.__refresh_token = refresh_token
140 self.__user_agent = user_agent
141 self.__access_token = None
142 self.__expires = None
143 self.__session = requests.Session()
144 self.base_url = None
145
146
147 def __enter__(self):
148 self.get_access_token()
149 return self
150
151 def __exit__(self, exception_type, exception_value, traceback):
152 self.__session.close()
153
154 @backoff.on_exception(backoff.expo,
155 Server5xxError,
156 max_tries=5,
157 factor=2)
158 def get_access_token(self):
159 # The refresh_token never expires and may be used many times to generate each access_token
160 # Since the refresh_token does not expire, it is not included in get access_token response
161 if self.__access_token is not None and self.__expires > datetime.utcnow():
162 return
163
164 headers = {}
165 if self.__user_agent:
166 headers['User-Agent'] = self.__user_agent
167
168 response = self.__session.post(
169 url=GOOGLE_TOKEN_URI,
170 headers=headers,
171 data={
172 'grant_type': 'refresh_token',
173 'client_id': self.__client_id,
174 'client_secret': self.__client_secret,
175 'refresh_token': self.__refresh_token,
176 })
177
178 if response.status_code >= 500:
179 raise Server5xxError()
180
181 if response.status_code != 200:
182 raise_for_error(response)
183
184 data = response.json()
185 self.__access_token = data['access_token']
186 self.__expires = datetime.utcnow() + timedelta(seconds=data['expires_in'])
187 LOGGER.info('Authorized, token expires = {}'.format(self.__expires))
188
189
190 @backoff.on_exception(backoff.expo,
191 (Server5xxError, ConnectionError, Server429Error),
192 max_tries=7,
193 factor=3)
194 # Rate Limit:
195 # https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits
196 @utils.ratelimit(1200, 60)
197 def request(self, method, path=None, url=None, api=None, **kwargs):
198
199 self.get_access_token()
200
201 self.base_url = 'https://sheets.googleapis.com/v4'
202 if api == 'files':
203 self.base_url = 'https://www.googleapis.com/drive/v3'
204
205 if not url and path:
206 url = '{}/{}'.format(self.base_url, path)
207
208 # endpoint = stream_name (from sync.py API call)
209 if 'endpoint' in kwargs:
210 endpoint = kwargs['endpoint']
211 del kwargs['endpoint']
212 else:
213 endpoint = None
214
215 if 'headers' not in kwargs:
216 kwargs['headers'] = {}
217 kwargs['headers']['Authorization'] = 'Bearer {}'.format(self.__access_token)
218
219 if self.__user_agent:
220 kwargs['headers']['User-Agent'] = self.__user_agent
221
222 if method == 'POST':
223 kwargs['headers']['Content-Type'] = 'application/json'
224
225 with metrics.http_request_timer(endpoint) as timer:
226 response = self.__session.request(method, url, **kwargs)
227 timer.tags[metrics.Tag.http_status_code] = response.status_code
228
229 if response.status_code >= 500:
230 raise Server5xxError()
231
232 #Use retry functionality in backoff to wait and retry if
233 #response code equals 429 because rate limit has been exceeded
234 if response.status_code == 429:
235 raise Server429Error()
236
237 if response.status_code != 200:
238 raise_for_error(response)
239
240 # Ensure keys and rows are ordered as received from API
241 return response.json(object_pairs_hook=OrderedDict)
242
243 def get(self, path, api, **kwargs):
244 return self.request(method='GET', path=path, api=api, **kwargs)
245
246 def post(self, path, api, **kwargs):
247 return self.request(method='POST', path=path, api=api, **kwargs)