]> git.immae.eu Git - github/fretlink/tap-google-sheets.git/blobdiff - tap_google_sheets/sync.py
Bump to v1.1.0, update changelog (#26)
[github/fretlink/tap-google-sheets.git] / tap_google_sheets / sync.py
index b77eab38af8c8bc0da5f27caba6057c6e4110d54..26c2d19cebd1a4c174eeffa3a0be78797006d8a4 100644 (file)
@@ -1,6 +1,8 @@
 import time
 import math
 import json
+import re
+import urllib.parse
 from datetime import datetime, timedelta
 import pytz
 import singer
@@ -75,10 +77,14 @@ def process_records(catalog,
         for record in records:
             # Transform record for Singer.io
             with Transformer() as transformer:
-                transformed_record = transformer.transform(
-                    record,
-                    schema,
-                    stream_metadata)
+                try:
+                    transformed_record = transformer.transform(
+                        record,
+                        schema,
+                        stream_metadata)
+                except Exception as err:
+                    LOGGER.error('{}'.format(err))
+                    raise RuntimeError(err)
                 write_record(
                     stream_name=stream_name,
                     record=transformed_record,
@@ -144,22 +150,26 @@ def get_data(stream_name,
     if not range_rows:
         range_rows = ''
     # Replace {placeholder} variables in path
+    # Encode stream_name: fixes issue w/ special characters in sheet name
+    stream_name_escaped = re.escape(stream_name)
+    stream_name_encoded = urllib.parse.quote_plus(stream_name)
     path = endpoint_config.get('path', stream_name).replace(
-        '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name).replace(
+        '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name_encoded).replace(
             '{range_rows}', range_rows)
     params = endpoint_config.get('params', {})
     api = endpoint_config.get('api', 'sheets')
     # Add in querystring parameters and replace {placeholder} variables
     # querystring function ensures parameters are added but not encoded causing API errors
     querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
-        '{sheet_title}', stream_name)
+        '{sheet_title}', stream_name_encoded)
+    LOGGER.info('URL: {}/{}?{}'.format(client.base_url, path, querystring))
     data = {}
     time_extracted = utils.now()
     data = client.get(
         path=path,
         api=api,
         params=querystring,
-        endpoint=stream_name)
+        endpoint=stream_name_escaped)
     return data, time_extracted
 
 
@@ -388,6 +398,8 @@ def sync(client, config, catalog, state):
     LOGGER.info('last_datetime = {}, this_datetime = {}'.format(last_datetime, this_datetime))
     if this_datetime <= last_datetime:
         LOGGER.info('this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.')
+        # Update file_metadata bookmark
+        write_bookmark(state, 'file_metadata', strftime(this_datetime))
         return
     # Sync file_metadata if selected
     sync_stream(stream_name, selected_streams, catalog, state, file_metadata_tf, time_extracted)
@@ -493,7 +505,7 @@ def sync(client, config, catalog, state):
                             spreadsheet_id=spreadsheet_id,
                             range_rows=range_rows)
                         # Data is returned as a list of arrays, an array of values for each row
-                        sheet_data_rows = sheet_data.get('values')
+                        sheet_data_rows = sheet_data.get('values', [])
 
                         # Transform batch of rows to JSON with keys for each column
                         sheet_data_tf, row_num = transform_sheet_data(