aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/schema.py
diff options
context:
space:
mode:
authorJeff Huth <39202799+jeffhuth-bytecode@users.noreply.github.com>2020-04-29 11:29:42 -0700
committerGitHub <noreply@github.com>2020-04-29 14:29:42 -0400
commit5fc2ead55ea6a67c13e3caeec315454148be593d (patch)
treed85ed97e65f0de7807f3b58c09ab01e08625a26c /tap_google_sheets/schema.py
parentba9a302c2b1411ceca7753bd8b97f1944410f1ce (diff)
downloadtap-google-sheets-5fc2ead55ea6a67c13e3caeec315454148be593d.tar.gz
tap-google-sheets-5fc2ead55ea6a67c13e3caeec315454148be593d.tar.zst
tap-google-sheets-5fc2ead55ea6a67c13e3caeec315454148be593d.zip
v.1.0.3 Fix slashes and discovery errors (#15)v1.0.3
Fix issues: slashes `/` in sheet name 404 error; Discovery malformed sheet error when 2nd row final column value(s) are `NULL`.
Diffstat (limited to 'tap_google_sheets/schema.py')
-rw-r--r--tap_google_sheets/schema.py18
1 files changed, 13 insertions, 5 deletions
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
index 3e63e75..fcaccf9 100644
--- a/tap_google_sheets/schema.py
+++ b/tap_google_sheets/schema.py
@@ -1,5 +1,7 @@
1import os 1import os
2import json 2import json
3import re
4import urllib.parse
3from collections import OrderedDict 5from collections import OrderedDict
4import singer 6import singer
5from singer import metadata 7from singer import metadata
@@ -74,8 +76,11 @@ def get_sheet_schema_columns(sheet):
74 try: 76 try:
75 first_value = first_values[i] 77 first_value = first_values[i]
76 except IndexError as err: 78 except IndexError as err:
77 raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format( 79 LOGGER.info('NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
78 sheet_title, column_name, column_letter, err)) 80 sheet_title, column_name, column_letter, err))
81 first_value = {}
82 first_values.append(first_value)
83 pass
79 84
80 column_effective_value = first_value.get('effectiveValue', {}) 85 column_effective_value = first_value.get('effectiveValue', {})
81 86
@@ -221,20 +226,23 @@ def get_sheet_metadata(sheet, spreadsheet_id, client):
221 stream_metadata = STREAMS.get(stream_name) 226 stream_metadata = STREAMS.get(stream_name)
222 api = stream_metadata.get('api', 'sheets') 227 api = stream_metadata.get('api', 'sheets')
223 params = stream_metadata.get('params', {}) 228 params = stream_metadata.get('params', {})
229 sheet_title_encoded = urllib.parse.quote_plus(sheet_title)
230 sheet_title_escaped = re.escape(sheet_title)
224 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \ 231 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \
225 params.items()]).replace('{sheet_title}', sheet_title) 232 params.items()]).replace('{sheet_title}', sheet_title_encoded)
226 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \ 233 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
227 spreadsheet_id), querystring) 234 spreadsheet_id), querystring)
228 235
229 sheet_md_results = client.get(path=path, api=api, endpoint=stream_name) 236 sheet_md_results = client.get(path=path, api=api, endpoint=sheet_title_escaped)
230 # sheet_metadata: 1st `sheets` node in results 237 # sheet_metadata: 1st `sheets` node in results
231 sheet_metadata = sheet_md_results.get('sheets')[0] 238 sheet_metadata = sheet_md_results.get('sheets')[0]
232 239
233 # Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results) 240 # Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results)
234 try: 241 try:
235 sheet_json_schema, columns = get_sheet_schema_columns(sheet_metadata) 242 sheet_json_schema, columns = get_sheet_schema_columns(sheet_metadata)
236 except: 243 except Exception as err:
237 LOGGER.info('SKIPPING Malformed sheet: {}'.format(sheet_title)) 244 LOGGER.warning('{}'.format(err))
245 LOGGER.warning('SKIPPING Malformed sheet: {}'.format(sheet_title))
238 sheet_json_schema, columns = None, None 246 sheet_json_schema, columns = None, None
239 247
240 return sheet_json_schema, columns 248 return sheet_json_schema, columns