diff options
Diffstat (limited to 'tap_google_sheets/sync.py')
-rw-r--r-- | tap_google_sheets/sync.py | 37 |
1 files changed, 10 insertions, 27 deletions
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py index 26c2d19..c67055a 100644 --- a/tap_google_sheets/sync.py +++ b/tap_google_sheets/sync.py | |||
@@ -141,35 +141,17 @@ def get_selected_fields(catalog, stream_name): | |||
141 | pass | 141 | pass |
142 | return selected_fields | 142 | return selected_fields |
143 | 143 | ||
144 | |||
145 | def get_data(stream_name, | 144 | def get_data(stream_name, |
146 | endpoint_config, | 145 | endpoint_config, |
147 | client, | 146 | client, |
148 | spreadsheet_id, | 147 | **kwargs): |
149 | range_rows=None): | ||
150 | if not range_rows: | ||
151 | range_rows = '' | ||
152 | # Replace {placeholder} variables in path | ||
153 | # Encode stream_name: fixes issue w/ special characters in sheet name | ||
154 | stream_name_escaped = re.escape(stream_name) | ||
155 | stream_name_encoded = urllib.parse.quote_plus(stream_name) | ||
156 | path = endpoint_config.get('path', stream_name).replace( | ||
157 | '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace( | ||
158 | '{range_rows}', range_rows) | ||
159 | params = endpoint_config.get('params', {}) | 148 | params = endpoint_config.get('params', {}) |
160 | api = endpoint_config.get('api', 'sheets') | 149 | LOGGER.info('GET {}'.format(stream_name)) |
161 | # Add in querystring parameters and replace {placeholder} variables | ||
162 | # querystring function ensures parameters are added but not encoded causing API errors | ||
163 | querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace( | ||
164 | '{sheet_title}', stream_name_encoded) | ||
165 | LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring)) | ||
166 | data = {} | ||
167 | time_extracted = utils.now() | 150 | time_extracted = utils.now() |
168 | data = client.get( | 151 | data = client.request( |
169 | path=path, | 152 | endpoint=stream_name, |
170 | api=api, | 153 | params=params, |
171 | params=querystring, | 154 | **kwargs) |
172 | endpoint=stream_name_escaped) | ||
173 | return data, time_extracted | 155 | return data, time_extracted |
174 | 156 | ||
175 | 157 | ||
@@ -382,7 +364,7 @@ def sync(client, config, catalog, state): | |||
382 | file_metadata_config = STREAMS.get(stream_name) | 364 | file_metadata_config = STREAMS.get(stream_name) |
383 | 365 | ||
384 | # GET file_metadata | 366 | # GET file_metadata |
385 | LOGGER.info('GET file_meatadata') | 367 | LOGGER.info('GET file_metadata') |
386 | file_metadata, time_extracted = get_data(stream_name=stream_name, | 368 | file_metadata, time_extracted = get_data(stream_name=stream_name, |
387 | endpoint_config=file_metadata_config, | 369 | endpoint_config=file_metadata_config, |
388 | client=client, | 370 | client=client, |
@@ -497,11 +479,12 @@ def sync(client, config, catalog, state): | |||
497 | while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row: | 479 | while not is_last_row and from_row < sheet_max_row and to_row <= sheet_max_row: |
498 | range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row) | 480 | range_rows = 'A{}:{}{}'.format(from_row, sheet_last_col_letter, to_row) |
499 | 481 | ||
500 | # GET sheet_data for a worksheet tab | 482 | # GET sheets_loaded for a worksheet tab |
501 | sheet_data, time_extracted = get_data( | 483 | sheet_data, time_extracted = get_data( |
502 | stream_name=sheet_title, | 484 | stream_name='sheets_loaded', |
503 | endpoint_config=sheets_loaded_config, | 485 | endpoint_config=sheets_loaded_config, |
504 | client=client, | 486 | client=client, |
487 | sheet_title=sheet_title, | ||
505 | spreadsheet_id=spreadsheet_id, | 488 | spreadsheet_id=spreadsheet_id, |
506 | range_rows=range_rows) | 489 | range_rows=range_rows) |
507 | # Data is returned as a list of arrays, an array of values for each row | 490 | # Data is returned as a list of arrays, an array of values for each row |