1 files changed, 41 insertions, 10 deletions
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
index 243467b..e319c03 100644
--- a/tap_google_sheets/schema.py
+++ b/tap_google_sheets/schema.py
@@ -21,6 +21,7 @@ def colnum_string(num):
 # Create sheet_metadata_json with columns from sheet
 def get_sheet_schema_columns(sheet):
+    sheet_title = sheet.get('properties', {}).get('title')
    sheet_json_schema = OrderedDict()
    data = next(iter(sheet.get('data', [])), {})
    row_data = data.get('rowData', [])
@@ -62,15 +63,34 @@ def get_sheet_schema_columns(sheet):
            skipped = 0
            column_name = '{}'.format(header_value)
            if column_name in header_list:
-                raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name))
+                raise Exception('DUPLICATE HEADER ERROR: SHEET: {}, COL: {}, CELL: {}1'.format(
+                    sheet_title, column_name, column_letter))
            header_list.append(column_name)
-            first_value = first_values[i]
+            first_value = None
+            try:
+                first_value = first_values[i]
+            except IndexError as err:
+                raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
+                    sheet_title, column_name, column_letter, err))
+            
            column_effective_value = first_value.get('effectiveValue', {})
-            for key in column_effective_value.keys():
-                if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'):
+            col_val = None
-                    column_effective_value_type = key
+            if column_effective_value == {}:
+                column_effective_value_type = 'stringValue'
+                LOGGER.info('WARNING: NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2.'.format(
+                    sheet_title, column_name, column_letter))
+                LOGGER.info('   Setting column datatype to STRING')
+            else:
+                for key, val in column_effective_value.items():
+                    if key in ('numberValue', 'stringValue', 'boolValue'):
+                        column_effective_value_type = key
+                        col_val = str(val)
+                    elif key in ('errorType', 'formulaType'):
+                        col_val = str(val)
+                        raise Exception('DATA TYPE ERROR 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
+                            sheet_title, column_name, column_letter, key, col_val))
            column_number_format = first_values[i].get('effectiveFormat', {}).get(
                'numberFormat', {})
@@ -87,7 +107,13 @@ def get_sheet_schema_columns(sheet):
            #  https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType
            #
            column_format = None # Default
-            if column_effective_value_type == 'stringValue':
+            if column_effective_value == {}:
+                col_properties = {'type': ['null', 'string']}
+                column_gs_type = 'stringValue'
+                LOGGER.info('WARNING: 2ND ROW VALUE IS BLANK: SHEET: {}, COL: {}, CELL: {}2'.format(
+                        sheet_title, column_name, column_letter))
+                LOGGER.info('   Setting column datatype to STRING')
+            elif column_effective_value_type == 'stringValue':
                col_properties = {'type': ['null', 'string']}
                column_gs_type = 'stringValue'
            elif column_effective_value_type == 'boolValue':
@@ -138,8 +164,8 @@ def get_sheet_schema_columns(sheet):
            else:
                col_properties = {'type': ['null', 'string']}
                column_gs_type = 'unsupportedValue'
-                LOGGER.info('Unsupported data type: {}, value: {}'.format(column_name, \
+                LOGGER.info('WARNING: UNSUPPORTED 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
-                    column_effective_value_type))
+                        sheet_title, column_name, column_letter, column_effective_value_type, col_val))
                LOGGER.info('Converting to string.')
        else: # skipped
            column_is_skipped = True
@@ -148,11 +174,16 @@ def get_sheet_schema_columns(sheet):
            column_name = '__sdc_skip_col_{}'.format(column_index_str)
            col_properties = {'type': ['null', 'string']}
            column_gs_type = 'stringValue'
+            LOGGER.info('WARNING: SKIPPED COLUMN; NO COLUMN HEADER. SHEET: {}, COL: {}, CELL: {}1'.format(
+                sheet_title, column_name, column_letter))
+            LOGGER.info('  This column will be skipped during data loading.')
        if skipped >= 2:
            # skipped = 2 consecutive skipped headers
            # Remove prior_header column_name
            sheet_json_schema['properties'].pop(prior_header, None)
+            LOGGER.info('TWO CONSECUTIVE SKIPPED COLUMNS. STOPPING SCAN AT: SHEET: {}, COL: {}, CELL {}1'.format(
+                sheet_title, column_name, column_letter))
            break
        else:
@@ -245,7 +276,7 @@ def get_schemas(client, spreadsheet_id):
                for sheet in sheets:
                    # GET sheet_json_schema for each worksheet (from function above)
                    sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)
-                    LOGGER.info('columns = {}'.format(columns))
+                    # LOGGER.info('columns = {}'.format(columns))
                    sheet_title = sheet.get('properties', {}).get('title')
                    schemas[sheet_title] = sheet_json_schema

diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py index 243467b..e319c03 100644 --- a/tap_google_sheets/schema.py +++ b/tap_google_sheets/schema.py
@@ -21,6 +21,7 @@ def colnum_string(num):
21		21
22	# Create sheet_metadata_json with columns from sheet	22	# Create sheet_metadata_json with columns from sheet
23	def get_sheet_schema_columns(sheet):	23	def get_sheet_schema_columns(sheet):
		24	sheet_title = sheet.get('properties', {}).get('title')
24	sheet_json_schema = OrderedDict()	25	sheet_json_schema = OrderedDict()
25	data = next(iter(sheet.get('data', [])), {})	26	data = next(iter(sheet.get('data', [])), {})
26	row_data = data.get('rowData', [])	27	row_data = data.get('rowData', [])
@@ -62,15 +63,34 @@ def get_sheet_schema_columns(sheet):
62	skipped = 0	63	skipped = 0
63	column_name = '{}'.format(header_value)	64	column_name = '{}'.format(header_value)
64	if column_name in header_list:	65	if column_name in header_list:
65	raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name))	66	raise Exception('DUPLICATE HEADER ERROR: SHEET: {}, COL: {}, CELL: {}1'.format(
		67	sheet_title, column_name, column_letter))
66	header_list.append(column_name)	68	header_list.append(column_name)
67		69
68	first_value = first_values[i]	70	first_value = None
69		71	try:
		72	first_value = first_values[i]
		73	except IndexError as err:
		74	raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
		75	sheet_title, column_name, column_letter, err))
		76
70	column_effective_value = first_value.get('effectiveValue', {})	77	column_effective_value = first_value.get('effectiveValue', {})
71	for key in column_effective_value.keys():	78
72	if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'):	79	col_val = None
73	column_effective_value_type = key	80	if column_effective_value == {}:
		81	column_effective_value_type = 'stringValue'
		82	LOGGER.info('WARNING: NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2.'.format(
		83	sheet_title, column_name, column_letter))
		84	LOGGER.info(' Setting column datatype to STRING')
		85	else:
		86	for key, val in column_effective_value.items():
		87	if key in ('numberValue', 'stringValue', 'boolValue'):
		88	column_effective_value_type = key
		89	col_val = str(val)
		90	elif key in ('errorType', 'formulaType'):
		91	col_val = str(val)
		92	raise Exception('DATA TYPE ERROR 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
		93	sheet_title, column_name, column_letter, key, col_val))
74		94
75	column_number_format = first_values[i].get('effectiveFormat', {}).get(	95	column_number_format = first_values[i].get('effectiveFormat', {}).get(
76	'numberFormat', {})	96	'numberFormat', {})
@@ -87,7 +107,13 @@ def get_sheet_schema_columns(sheet):
87	# https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType	107	# https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType
88	#	108	#
89	column_format = None # Default	109	column_format = None # Default
90	if column_effective_value_type == 'stringValue':	110	if column_effective_value == {}:
		111	col_properties = {'type': ['null', 'string']}
		112	column_gs_type = 'stringValue'
		113	LOGGER.info('WARNING: 2ND ROW VALUE IS BLANK: SHEET: {}, COL: {}, CELL: {}2'.format(
		114	sheet_title, column_name, column_letter))
		115	LOGGER.info(' Setting column datatype to STRING')
		116	elif column_effective_value_type == 'stringValue':
91	col_properties = {'type': ['null', 'string']}	117	col_properties = {'type': ['null', 'string']}
92	column_gs_type = 'stringValue'	118	column_gs_type = 'stringValue'
93	elif column_effective_value_type == 'boolValue':	119	elif column_effective_value_type == 'boolValue':
@@ -138,8 +164,8 @@ def get_sheet_schema_columns(sheet):
138	else:	164	else:
139	col_properties = {'type': ['null', 'string']}	165	col_properties = {'type': ['null', 'string']}
140	column_gs_type = 'unsupportedValue'	166	column_gs_type = 'unsupportedValue'
141	LOGGER.info('Unsupported data type: {}, value: {}'.format(column_name, \	167	LOGGER.info('WARNING: UNSUPPORTED 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
142	column_effective_value_type))	168	sheet_title, column_name, column_letter, column_effective_value_type, col_val))
143	LOGGER.info('Converting to string.')	169	LOGGER.info('Converting to string.')
144	else: # skipped	170	else: # skipped
145	column_is_skipped = True	171	column_is_skipped = True
@@ -148,11 +174,16 @@ def get_sheet_schema_columns(sheet):
148	column_name = '__sdc_skip_col_{}'.format(column_index_str)	174	column_name = '__sdc_skip_col_{}'.format(column_index_str)
149	col_properties = {'type': ['null', 'string']}	175	col_properties = {'type': ['null', 'string']}
150	column_gs_type = 'stringValue'	176	column_gs_type = 'stringValue'
		177	LOGGER.info('WARNING: SKIPPED COLUMN; NO COLUMN HEADER. SHEET: {}, COL: {}, CELL: {}1'.format(
		178	sheet_title, column_name, column_letter))
		179	LOGGER.info(' This column will be skipped during data loading.')
151		180
152	if skipped >= 2:	181	if skipped >= 2:
153	# skipped = 2 consecutive skipped headers	182	# skipped = 2 consecutive skipped headers
154	# Remove prior_header column_name	183	# Remove prior_header column_name
155	sheet_json_schema['properties'].pop(prior_header, None)	184	sheet_json_schema['properties'].pop(prior_header, None)
		185	LOGGER.info('TWO CONSECUTIVE SKIPPED COLUMNS. STOPPING SCAN AT: SHEET: {}, COL: {}, CELL {}1'.format(
		186	sheet_title, column_name, column_letter))
156	break	187	break
157		188
158	else:	189	else:
@@ -245,7 +276,7 @@ def get_schemas(client, spreadsheet_id):
245	for sheet in sheets:	276	for sheet in sheets:
246	# GET sheet_json_schema for each worksheet (from function above)	277	# GET sheet_json_schema for each worksheet (from function above)
247	sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)	278	sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)
248	LOGGER.info('columns = {}'.format(columns))	279	# LOGGER.info('columns = {}'.format(columns))
249		280
250	sheet_title = sheet.get('properties', {}).get('title')	281	sheet_title = sheet.get('properties', {}).get('title')
251	schemas[sheet_title] = sheet_json_schema	282	schemas[sheet_title] = sheet_json_schema