aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/sync.py
diff options
context:
space:
mode:
authorPaul B <paul@bonaud.fr>2020-11-19 12:35:22 +0100
committerPaul B <paul@bonaud.fr>2020-11-21 00:32:35 +0100
commit4bf194076d39d516c3cd0f5c3559954ebe8a12f2 (patch)
tree422588a014088598ad93884f86224a90ee4333fa /tap_google_sheets/sync.py
parent1080d5ece1d90464c448c7e3f8dc58410fad0601 (diff)
downloadtap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.tar.gz
tap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.tar.zst
tap-google-sheets-4bf194076d39d516c3cd0f5c3559954ebe8a12f2.zip
feat: use the official Google API python library
These changes will make use of the official `google-api-python-client` library instead of relying on manual HTTP requests. Therer are two main advantages of these changes: - the Tap doesn't need to worry about the Google API interaction details as its hidden away by the Google official lib. - We can use the authentication helpers from the lib to ease the credentials management for the user. In that way the current PR implements two auth mean: installed OAuth client authentication or Service Accounts authentication. The only downside of this change is that it breaks the current `config.json` parameters for existing users.
Diffstat (limited to 'tap_google_sheets/sync.py')
-rw-r--r--tap_google_sheets/sync.py37
1 files changed, 10 insertions, 27 deletions
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index 26c2d19..c67055a 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -141,35 +141,17 @@ def get_selected_fields(catalog, stream_name):
141 pass 141 pass
142 return selected_fields 142 return selected_fields
143 143
144
145def get_data(stream_name, 144def get_data(stream_name,
146 endpoint_config, 145 endpoint_config,
147 client, 146 client,
148 spreadsheet_id, 147 **kwargs):
149 range_rows=None):
150 if not range_rows:
151 range_rows = ''
152 # Replace {placeholder} variables in path
153 # Encode stream_name: fixes issue w/ special characters in sheet name
154 stream_name_escaped = re.escape(stream_name)
155 stream_name_encoded = urllib.parse.quote_plus(stream_name)
156 path = endpoint_config.get('path', stream_name).replace(
157 '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace(
158 '{range_rows}', range_rows)
159 params = endpoint_config.get('params', {}) 148 params = endpoint_config.get('params', {})
160 api = endpoint_config.get('api', 'sheets') 149 LOGGER.info('GET {}'.format(stream_name))
161 # Add in querystring parameters and replace {placeholder} variables
162 # querystring function ensures parameters are added but not encoded causing API errors
163 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
164 '{sheet_title}', stream_name_encoded)
165 LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring))
166 data = {}
167 time_extracted = utils.now() 150 time_extracted = utils.now()
168 data = client.get( 151 data = client.request(
169 path=path, 152 endpoint=stream_name,
170 api=api, 153 params=params,
171 params=querystring, 154 **kwargs)
172 endpoint=stream_name_escaped)
173 return data, time_extracted 155 return data, time_extracted
174 156
175 157
@@ -382,7 +364,7 @@ def sync(client, config, catalog, state):
382 file_metadata_config = STREAMS.get(stream_name) 364 file_metadata_config = STREAMS.get(stream_name)
383 365
384 # GET file_metadata 366 # GET file_metadata
385 LOGGER.info('GET file_meatadata') 367 LOGGER.info('GET file_metadata')
386 file_metadata, time_extracted = get_data(stream_name=stream_name, 368 file_metadata, time_extracted = get_data(stream_name=stream_name,
387 endpoint_config=file_metadata_config, 369 endpoint_config=file_metadata_config,
388 client=client, 370 client=client,
@@ -497,11 +479,12 @@ def sync(client, config, catalog, state):
497 while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row: 479 while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row:
498 range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row) 480 range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row)
499 481
500 # GET sheet_data for a worksheet tab 482 # GET sheets_loaded for a worksheet tab
501 sheet_data, time_extracted = get_data( 483 sheet_data, time_extracted = get_data(
502 stream_name=sheet_title, 484 stream_name='sheets_loaded',
503 endpoint_config=sheets_loaded_config, 485 endpoint_config=sheets_loaded_config,
504 client=client, 486 client=client,
487 sheet_title=sheet_title,
505 spreadsheet_id=spreadsheet_id, 488 spreadsheet_id=spreadsheet_id,
506 range_rows=range_rows) 489 range_rows=range_rows)
507 # Data is returned as a list of arrays, an array of values for each row 490 # Data is returned as a list of arrays, an array of values for each row