aboutsummaryrefslogtreecommitdiffhomepage
path: root/tap_google_sheets/schema.py
diff options
context:
space:
mode:
Diffstat (limited to 'tap_google_sheets/schema.py')
-rw-r--r--tap_google_sheets/schema.py51
1 files changed, 41 insertions, 10 deletions
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
index 243467b..e319c03 100644
--- a/tap_google_sheets/schema.py
+++ b/tap_google_sheets/schema.py
@@ -21,6 +21,7 @@ def colnum_string(num):
21 21
22# Create sheet_metadata_json with columns from sheet 22# Create sheet_metadata_json with columns from sheet
23def get_sheet_schema_columns(sheet): 23def get_sheet_schema_columns(sheet):
24 sheet_title = sheet.get('properties', {}).get('title')
24 sheet_json_schema = OrderedDict() 25 sheet_json_schema = OrderedDict()
25 data = next(iter(sheet.get('data', [])), {}) 26 data = next(iter(sheet.get('data', [])), {})
26 row_data = data.get('rowData', []) 27 row_data = data.get('rowData', [])
@@ -62,15 +63,34 @@ def get_sheet_schema_columns(sheet):
62 skipped = 0 63 skipped = 0
63 column_name = '{}'.format(header_value) 64 column_name = '{}'.format(header_value)
64 if column_name in header_list: 65 if column_name in header_list:
65 raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name)) 66 raise Exception('DUPLICATE HEADER ERROR: SHEET: {}, COL: {}, CELL: {}1'.format(
67 sheet_title, column_name, column_letter))
66 header_list.append(column_name) 68 header_list.append(column_name)
67 69
68 first_value = first_values[i] 70 first_value = None
69 71 try:
72 first_value = first_values[i]
73 except IndexError as err:
74 raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format(
75 sheet_title, column_name, column_letter, err))
76
70 column_effective_value = first_value.get('effectiveValue', {}) 77 column_effective_value = first_value.get('effectiveValue', {})
71 for key in column_effective_value.keys(): 78
72 if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'): 79 col_val = None
73 column_effective_value_type = key 80 if column_effective_value == {}:
81 column_effective_value_type = 'stringValue'
82 LOGGER.info('WARNING: NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2.'.format(
83 sheet_title, column_name, column_letter))
84 LOGGER.info(' Setting column datatype to STRING')
85 else:
86 for key, val in column_effective_value.items():
87 if key in ('numberValue', 'stringValue', 'boolValue'):
88 column_effective_value_type = key
89 col_val = str(val)
90 elif key in ('errorType', 'formulaType'):
91 col_val = str(val)
92 raise Exception('DATA TYPE ERROR 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
93 sheet_title, column_name, column_letter, key, col_val))
74 94
75 column_number_format = first_values[i].get('effectiveFormat', {}).get( 95 column_number_format = first_values[i].get('effectiveFormat', {}).get(
76 'numberFormat', {}) 96 'numberFormat', {})
@@ -87,7 +107,13 @@ def get_sheet_schema_columns(sheet):
87 # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType 107 # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType
88 # 108 #
89 column_format = None # Default 109 column_format = None # Default
90 if column_effective_value_type == 'stringValue': 110 if column_effective_value == {}:
111 col_properties = {'type': ['null', 'string']}
112 column_gs_type = 'stringValue'
113 LOGGER.info('WARNING: 2ND ROW VALUE IS BLANK: SHEET: {}, COL: {}, CELL: {}2'.format(
114 sheet_title, column_name, column_letter))
115 LOGGER.info(' Setting column datatype to STRING')
116 elif column_effective_value_type == 'stringValue':
91 col_properties = {'type': ['null', 'string']} 117 col_properties = {'type': ['null', 'string']}
92 column_gs_type = 'stringValue' 118 column_gs_type = 'stringValue'
93 elif column_effective_value_type == 'boolValue': 119 elif column_effective_value_type == 'boolValue':
@@ -138,8 +164,8 @@ def get_sheet_schema_columns(sheet):
138 else: 164 else:
139 col_properties = {'type': ['null', 'string']} 165 col_properties = {'type': ['null', 'string']}
140 column_gs_type = 'unsupportedValue' 166 column_gs_type = 'unsupportedValue'
141 LOGGER.info('Unsupported data type: {}, value: {}'.format(column_name, \ 167 LOGGER.info('WARNING: UNSUPPORTED 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format(
142 column_effective_value_type)) 168 sheet_title, column_name, column_letter, column_effective_value_type, col_val))
143 LOGGER.info('Converting to string.') 169 LOGGER.info('Converting to string.')
144 else: # skipped 170 else: # skipped
145 column_is_skipped = True 171 column_is_skipped = True
@@ -148,11 +174,16 @@ def get_sheet_schema_columns(sheet):
148 column_name = '__sdc_skip_col_{}'.format(column_index_str) 174 column_name = '__sdc_skip_col_{}'.format(column_index_str)
149 col_properties = {'type': ['null', 'string']} 175 col_properties = {'type': ['null', 'string']}
150 column_gs_type = 'stringValue' 176 column_gs_type = 'stringValue'
177 LOGGER.info('WARNING: SKIPPED COLUMN; NO COLUMN HEADER. SHEET: {}, COL: {}, CELL: {}1'.format(
178 sheet_title, column_name, column_letter))
179 LOGGER.info(' This column will be skipped during data loading.')
151 180
152 if skipped >= 2: 181 if skipped >= 2:
153 # skipped = 2 consecutive skipped headers 182 # skipped = 2 consecutive skipped headers
154 # Remove prior_header column_name 183 # Remove prior_header column_name
155 sheet_json_schema['properties'].pop(prior_header, None) 184 sheet_json_schema['properties'].pop(prior_header, None)
185 LOGGER.info('TWO CONSECUTIVE SKIPPED COLUMNS. STOPPING SCAN AT: SHEET: {}, COL: {}, CELL {}1'.format(
186 sheet_title, column_name, column_letter))
156 break 187 break
157 188
158 else: 189 else:
@@ -245,7 +276,7 @@ def get_schemas(client, spreadsheet_id):
245 for sheet in sheets: 276 for sheet in sheets:
246 # GET sheet_json_schema for each worksheet (from function above) 277 # GET sheet_json_schema for each worksheet (from function above)
247 sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) 278 sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)
248 LOGGER.info('columns = {}'.format(columns)) 279 # LOGGER.info('columns = {}'.format(columns))
249 280
250 sheet_title = sheet.get('properties', {}).get('title') 281 sheet_title = sheet.get('properties', {}).get('title')
251 schemas[sheet_title] = sheet_json_schema 282 schemas[sheet_title] = sheet_json_schema