aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md3
-rw-r--r--setup.py2
-rw-r--r--tap_google_sheets/schema.py18
-rw-r--r--tap_google_sheets/sync.py24
4 files changed, 34 insertions, 13 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb38c2c..08c4751 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
1# Changelog 1# Changelog
2 2
3## 1.0.3
4 * Fix issues: slashes `/` in sheet name 404 error; Discovery malformed sheet error when 2nd row final column value(s) are `NULL`.
5
3## 1.0.2 6## 1.0.2
4 * Skip sheets for which we fail to generate a schema 7 * Skip sheets for which we fail to generate a schema
5 8
diff --git a/setup.py b/setup.py
index 49c3e3e..ec63fdf 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
3from setuptools import setup, find_packages 3from setuptools import setup, find_packages
4 4
5setup(name='tap-google-sheets', 5setup(name='tap-google-sheets',
6 version='1.0.2', 6 version='1.0.3',
7 description='Singer.io tap for extracting data from the Google Sheets v4 API', 7 description='Singer.io tap for extracting data from the Google Sheets v4 API',
8 author='jeff.huth@bytecode.io', 8 author='jeff.huth@bytecode.io',
9 classifiers=['Programming Language :: Python :: 3 :: Only'], 9 classifiers=['Programming Language :: Python :: 3 :: Only'],
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
index 3e63e75..fcaccf9 100644
--- a/tap_google_sheets/schema.py
+++ b/tap_google_sheets/schema.py
@@ -1,5 +1,7 @@
1import os 1import os
2import json 2import json
3import re
4import urllib.parse
3from collections import OrderedDict 5from collections import OrderedDict
4import singer 6import singer
5from singer import metadata 7from singer import metadata
@@ -74,8 +76,11 @@ def get_sheet_schema_columns(sheet):
74 try: 76 try:
75 first_value = first_values[i] 77 first_value = first_values[i]
76 except IndexError as err: 78 except IndexError as err:
77 raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format( 79 LOGGER.info('NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
78 sheet_title, column_name, column_letter, err)) 80 sheet_title, column_name, column_letter, err))
81 first_value = {}
82 first_values.append(first_value)
83 pass
79 84
80 column_effective_value = first_value.get('effectiveValue', {}) 85 column_effective_value = first_value.get('effectiveValue', {})
81 86
@@ -221,20 +226,23 @@ def get_sheet_metadata(sheet, spreadsheet_id, client):
221 stream_metadata = STREAMS.get(stream_name) 226 stream_metadata = STREAMS.get(stream_name)
222 api = stream_metadata.get('api', 'sheets') 227 api = stream_metadata.get('api', 'sheets')
223 params = stream_metadata.get('params', {}) 228 params = stream_metadata.get('params', {})
229 sheet_title_encoded = urllib.parse.quote_plus(sheet_title)
230 sheet_title_escaped = re.escape(sheet_title)
224 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \ 231 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \
225 params.items()]).replace('{sheet_title}', sheet_title) 232 params.items()]).replace('{sheet_title}', sheet_title_encoded)
226 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \ 233 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
227 spreadsheet_id), querystring) 234 spreadsheet_id), querystring)
228 235
229 sheet_md_results = client.get(path=path, api=api, endpoint=stream_name) 236 sheet_md_results = client.get(path=path, api=api, endpoint=sheet_title_escaped)
230 # sheet_metadata: 1st `sheets` node in results 237 # sheet_metadata: 1st `sheets` node in results
231 sheet_metadata = sheet_md_results.get('sheets')[0] 238 sheet_metadata = sheet_md_results.get('sheets')[0]
232 239
233 # Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results) 240 # Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results)
234 try: 241 try:
235 sheet_json_schema, columns = get_sheet_schema_columns(sheet_metadata) 242 sheet_json_schema, columns = get_sheet_schema_columns(sheet_metadata)
236 except: 243 except Exception as err:
237 LOGGER.info('SKIPPING Malformed sheet: {}'.format(sheet_title)) 244 LOGGER.warning('{}'.format(err))
245 LOGGER.warning('SKIPPING Malformed sheet: {}'.format(sheet_title))
238 sheet_json_schema, columns = None, None 246 sheet_json_schema, columns = None, None
239 247
240 return sheet_json_schema, columns 248 return sheet_json_schema, columns
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index 689be27..3e26bc4 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -1,6 +1,8 @@
1import time 1import time
2import math 2import math
3import json 3import json
4import re
5import urllib.parse
4from datetime import datetime, timedelta 6from datetime import datetime, timedelta
5import pytz 7import pytz
6import singer 8import singer
@@ -75,10 +77,14 @@ def process_records(catalog,
75 for record in records: 77 for record in records:
76 # Transform record for Singer.io 78 # Transform record for Singer.io
77 with Transformer() as transformer: 79 with Transformer() as transformer:
78 transformed_record = transformer.transform( 80 try:
79 record, 81 transformed_record = transformer.transform(
80 schema, 82 record,
81 stream_metadata) 83 schema,
84 stream_metadata)
85 except Exception as err:
86 LOGGER.error('{}'.format(err))
87 raise RuntimeError(err)
82 write_record( 88 write_record(
83 stream_name=stream_name, 89 stream_name=stream_name,
84 record=transformed_record, 90 record=transformed_record,
@@ -144,22 +150,26 @@ def get_data(stream_name,
144 if not range_rows: 150 if not range_rows:
145 range_rows = '' 151 range_rows = ''
146 # Replace {placeholder} variables in path 152 # Replace {placeholder} variables in path
153 # Encode stream_name: fixes issue w/ special characters in sheet name
154 stream_name_escaped = re.escape(stream_name)
155 stream_name_encoded = urllib.parse.quote_plus(stream_name)
147 path = endpoint_config.get('path', stream_name).replace( 156 path = endpoint_config.get('path', stream_name).replace(
148 '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name).replace( 157 '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace(
149 '{range_rows}', range_rows) 158 '{range_rows}', range_rows)
150 params = endpoint_config.get('params', {}) 159 params = endpoint_config.get('params', {})
151 api = endpoint_config.get('api', 'sheets') 160 api = endpoint_config.get('api', 'sheets')
152 # Add in querystring parameters and replace {placeholder} variables 161 # Add in querystring parameters and replace {placeholder} variables
153 # querystring function ensures parameters are added but not encoded causing API errors 162 # querystring function ensures parameters are added but not encoded causing API errors
154 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace( 163 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
155 '{sheet_title}', stream_name) 164 '{sheet_title}', stream_name_encoded)
165 LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring))
156 data = {} 166 data = {}
157 time_extracted = utils.now() 167 time_extracted = utils.now()
158 data = client.get( 168 data = client.get(
159 path=path, 169 path=path,
160 api=api, 170 api=api,
161 params=querystring, 171 params=querystring,
162 endpoint=stream_name) 172 endpoint=stream_name_escaped)
163 return data, time_extracted 173 return data, time_extracted
164 174
165 175