aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/schema.py
diff options
context:
space:
mode:
authorPaul B <paul@bonaud.fr>2020-11-19 12:35:22 +0100
committerPaul B <paul@bonaud.fr>2020-11-21 00:32:35 +0100
commit4bf194076d39d516c3cd0f5c3559954ebe8a12f2 (patch)
tree422588a014088598ad93884f86224a90ee4333fa /tap_google_sheets/schema.py
parent1080d5ece1d90464c448c7e3f8dc58410fad0601 (diff)
downloadtap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.tar.gz
tap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.tar.zst
tap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.zip
feat: use the official Google API python library
These changes will make use of the official `google-api-python-client` library instead of relying on manual HTTP requests. Therer are two main advantages of these changes: - the Tap doesn't need to worry about the Google API interaction details as its hidden away by the Google official lib. - We can use the authentication helpers from the lib to ease the credentials management for the user. In that way the current PR implements two auth mean: installed OAuth client authentication or Service Accounts authentication. The only downside of this change is that it breaks the current `config.json` parameters for existing users.
Diffstat (limited to 'tap_google_sheets/schema.py')
-rw-r--r--tap_google_sheets/schema.py24
1 files changed, 9 insertions, 15 deletions
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
index fcaccf9..56d2fb9 100644
--- a/tap_google_sheets/schema.py
+++ b/tap_google_sheets/schema.py
@@ -224,16 +224,13 @@ def get_sheet_metadata(sheet, spreadsheet_id, client):
224 224
225 stream_name = 'sheet_metadata' 225 stream_name = 'sheet_metadata'
226 stream_metadata = STREAMS.get(stream_name) 226 stream_metadata = STREAMS.get(stream_name)
227 api = stream_metadata.get('api', 'sheets')
228 params = stream_metadata.get('params', {}) 227 params = stream_metadata.get('params', {})
229 sheet_title_encoded = urllib.parse.quote_plus(sheet_title) 228
230 sheet_title_escaped = re.escape(sheet_title) 229 # GET sheet_metadata
231 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \ 230 sheet_md_results = client.request(endpoint=stream_name,
232 params.items()]).replace('{sheet_title}', sheet_title_encoded) 231 spreadsheet_id=spreadsheet_id,
233 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \ 232 sheet_title=sheet_title,
234 spreadsheet_id), querystring) 233 params=params)
235
236 sheet_md_results = client.get(path=path, api=api, endpoint=sheet_title_escaped)
237 # sheet_metadata: 1st `sheets` node in results 234 # sheet_metadata: 1st `sheets` node in results
238 sheet_metadata = sheet_md_results.get('sheets')[0] 235 sheet_metadata = sheet_md_results.get('sheets')[0]
239 236
@@ -275,15 +272,12 @@ def get_schemas(client, spreadsheet_id):
275 field_metadata[stream_name] = mdata 272 field_metadata[stream_name] = mdata
276 273
277 if stream_name == 'spreadsheet_metadata': 274 if stream_name == 'spreadsheet_metadata':
278 api = stream_metadata.get('api', 'sheets')
279 params = stream_metadata.get('params', {}) 275 params = stream_metadata.get('params', {})
280 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()])
281 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
282 spreadsheet_id), querystring)
283 276
284 # GET spreadsheet_metadata, which incl. sheets (basic metadata for each worksheet) 277 # GET spreadsheet_metadata, which incl. sheets (basic metadata for each worksheet)
285 spreadsheet_md_results = client.get(path=path, params=querystring, api=api, \ 278 spreadsheet_md_results = client.request(endpoint=stream_name,
286 endpoint=stream_name) 279 spreadsheet_id=spreadsheet_id,
280 params=params)
287 281
288 sheets = spreadsheet_md_results.get('sheets') 282 sheets = spreadsheet_md_results.get('sheets')
289 if sheets: 283 if sheets: