Fix issues: slashes `/` in sheet name 404 error; Discovery malformed sheet error when 2nd row final column value(s) are `NULL`.
# Changelog
+## 1.0.3
+ * Fix issues: slashes `/` in sheet name 404 error; Discovery malformed sheet error when 2nd row final column value(s) are `NULL`.
+
## 1.0.2
* Skip sheets for which we fail to generate a schema
from setuptools import setup, find_packages
setup(name='tap-google-sheets',
- version='1.0.2',
+ version='1.0.3',
description='Singer.io tap for extracting data from the Google Sheets v4 API',
author='jeff.huth@bytecode.io',
classifiers=['Programming Language :: Python :: 3 :: Only'],
import os
import json
+import re
+import urllib.parse
from collections import OrderedDict
import singer
from singer import metadata
try:
first_value = first_values[i]
except IndexError as err:
- raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
+ LOGGER.info('NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
sheet_title, column_name, column_letter, err))
+ first_value = {}
+ first_values.append(first_value)
+ pass
column_effective_value = first_value.get('effectiveValue', {})
stream_metadata = STREAMS.get(stream_name)
api = stream_metadata.get('api', 'sheets')
params = stream_metadata.get('params', {})
+ sheet_title_encoded = urllib.parse.quote_plus(sheet_title)
+ sheet_title_escaped = re.escape(sheet_title)
querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in \
- params.items()]).replace('{sheet_title}', sheet_title)
+ params.items()]).replace('{sheet_title}', sheet_title_encoded)
path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', \
spreadsheet_id), querystring)
- sheet_md_results = client.get(path=path, api=api, endpoint=stream_name)
+ sheet_md_results = client.get(path=path, api=api, endpoint=sheet_title_escaped)
# sheet_metadata: 1st `sheets` node in results
sheet_metadata = sheet_md_results.get('sheets')[0]
# Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results)
try:
sheet_json_schema, columns = get_sheet_schema_columns(sheet_metadata)
- except:
- LOGGER.info('SKIPPING Malformed sheet: {}'.format(sheet_title))
+ except Exception as err:
+ LOGGER.warning('{}'.format(err))
+ LOGGER.warning('SKIPPING Malformed sheet: {}'.format(sheet_title))
sheet_json_schema, columns = None, None
return sheet_json_schema, columns
import time
import math
import json
+import re
+import urllib.parse
from datetime import datetime, timedelta
import pytz
import singer
for record in records:
# Transform record for Singer.io
with Transformer() as transformer:
- transformed_record = transformer.transform(
- record,
- schema,
- stream_metadata)
+ try:
+ transformed_record = transformer.transform(
+ record,
+ schema,
+ stream_metadata)
+ except Exception as err:
+ LOGGER.error('{}'.format(err))
+ raise RuntimeError(err)
write_record(
stream_name=stream_name,
record=transformed_record,
if not range_rows:
range_rows = ''
# Replace {placeholder} variables in path
+ # Encode stream_name: fixes issue w/ special characters in sheet name
+ stream_name_escaped = re.escape(stream_name)
+ stream_name_encoded = urllib.parse.quote_plus(stream_name)
path = endpoint_config.get('path', stream_name).replace(
- '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name).replace(
+ '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace(
'{range_rows}', range_rows)
params = endpoint_config.get('params', {})
api = endpoint_config.get('api', 'sheets')
# Add in querystring parameters and replace {placeholder} variables
# querystring function ensures parameters are added but not encoded causing API errors
querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
- '{sheet_title}', stream_name)
+ '{sheet_title}', stream_name_encoded)
+ LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring))
data = {}
time_extracted = utils.now()
data = client.get(
path=path,
api=api,
params=querystring,
- endpoint=stream_name)
+ endpoint=stream_name_escaped)
return data, time_extracted