aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/sync.py
diff options
context:
space:
mode:
Diffstat (limited to 'tap_google_sheets/sync.py')
-rw-r--r--tap_google_sheets/sync.py37
1 files changed, 10 insertions, 27 deletions
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index 26c2d19..c67055a 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -141,35 +141,17 @@ def get_selected_fields(catalog, stream_name):
141 pass 141 pass
142 return selected_fields 142 return selected_fields
143 143
144
145def get_data(stream_name, 144def get_data(stream_name,
146 endpoint_config, 145 endpoint_config,
147 client, 146 client,
148 spreadsheet_id, 147 **kwargs):
149 range_rows=None):
150 if not range_rows:
151 range_rows = ''
152 # Replace {placeholder} variables in path
153 # Encode stream_name: fixes issue w/ special characters in sheet name
154 stream_name_escaped = re.escape(stream_name)
155 stream_name_encoded = urllib.parse.quote_plus(stream_name)
156 path = endpoint_config.get('path', stream_name).replace(
157 '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace(
158 '{range_rows}', range_rows)
159 params = endpoint_config.get('params', {}) 148 params = endpoint_config.get('params', {})
160 api = endpoint_config.get('api', 'sheets') 149 LOGGER.info('GET {}'.format(stream_name))
161 # Add in querystring parameters and replace {placeholder} variables
162 # querystring function ensures parameters are added but not encoded causing API errors
163 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
164 '{sheet_title}', stream_name_encoded)
165 LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring))
166 data = {}
167 time_extracted = utils.now() 150 time_extracted = utils.now()
168 data = client.get( 151 data = client.request(
169 path=path, 152 endpoint=stream_name,
170 api=api, 153 params=params,
171 params=querystring, 154 **kwargs)
172 endpoint=stream_name_escaped)
173 return data, time_extracted 155 return data, time_extracted
174 156
175 157
@@ -382,7 +364,7 @@ def sync(client, config, catalog, state):
382 file_metadata_config = STREAMS.get(stream_name) 364 file_metadata_config = STREAMS.get(stream_name)
383 365
384 # GET file_metadata 366 # GET file_metadata
385 LOGGER.info('GET file_meatadata') 367 LOGGER.info('GET file_metadata')
386 file_metadata, time_extracted = get_data(stream_name=stream_name, 368 file_metadata, time_extracted = get_data(stream_name=stream_name,
387 endpoint_config=file_metadata_config, 369 endpoint_config=file_metadata_config,
388 client=client, 370 client=client,
@@ -497,11 +479,12 @@ def sync(client, config, catalog, state):
497 while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row: 479 while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row:
498 range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row) 480 range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row)
499 481
500 # GET sheet_data for a worksheet tab 482 # GET sheets_loaded for a worksheet tab
501 sheet_data, time_extracted = get_data( 483 sheet_data, time_extracted = get_data(
502 stream_name=sheet_title, 484 stream_name='sheets_loaded',
503 endpoint_config=sheets_loaded_config, 485 endpoint_config=sheets_loaded_config,
504 client=client, 486 client=client,
487 sheet_title=sheet_title,
505 spreadsheet_id=spreadsheet_id, 488 spreadsheet_id=spreadsheet_id,
506 range_rows=range_rows) 489 range_rows=range_rows)
507 # Data is returned as a list of arrays, an array of values for each row 490 # Data is returned as a list of arrays, an array of values for each row