diff options
author | Jeff Huth <39202799+jeffhuth-bytecode@users.noreply.github.com> | 2020-01-09 07:30:53 -0800 |
---|---|---|
committer | Kyle Allan <KAllan357@gmail.com> | 2020-01-09 10:30:53 -0500 |
commit | 43a24cbab1dbc35b893c35b86e34adc0f2fb84e7 (patch) | |
tree | bcbaae860aad0a94bcc4d27f4804504691401438 /tap_google_sheets/schema.py | |
parent | 5890b89c1aa7c554235b3cef156b5a5a2c594bec (diff) | |
download | tap-google-sheets-e746354ec6a62e29923d1a18272b88a57527f172.tar.gz tap-google-sheets-e746354ec6a62e29923d1a18272b88a57527f172.tar.zst tap-google-sheets-e746354ec6a62e29923d1a18272b88a57527f172.zip |
v.0.0.3 Sync error handling, activate version, documentation (#2)v0.0.3
* v.0.0.2 schema and sync changes
Change number json schema to anyOf with multipleOf; skip empty rows; move write_bookmark to end of sync.py
* v.0.0.3 Sync activate version and error handling
Update README.md documentation. Improved logging and handling of errors and warnings. Better null handling in Discovery and Sync. Fix issues with activate version messages.
Diffstat (limited to 'tap_google_sheets/schema.py')
-rw-r--r-- | tap_google_sheets/schema.py | 51 |
1 files changed, 41 insertions, 10 deletions
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py index 243467b..e319c03 100644 --- a/tap_google_sheets/schema.py +++ b/tap_google_sheets/schema.py | |||
@@ -21,6 +21,7 @@ def colnum_string(num): | |||
21 | 21 | ||
22 | # Create sheet_metadata_json with columns from sheet | 22 | # Create sheet_metadata_json with columns from sheet |
23 | def get_sheet_schema_columns(sheet): | 23 | def get_sheet_schema_columns(sheet): |
24 | sheet_title = sheet.get('properties', {}).get('title') | ||
24 | sheet_json_schema = OrderedDict() | 25 | sheet_json_schema = OrderedDict() |
25 | data = next(iter(sheet.get('data', [])), {}) | 26 | data = next(iter(sheet.get('data', [])), {}) |
26 | row_data = data.get('rowData', []) | 27 | row_data = data.get('rowData', []) |
@@ -62,15 +63,34 @@ def get_sheet_schema_columns(sheet): | |||
62 | skipped = 0 | 63 | skipped = 0 |
63 | column_name = '{}'.format(header_value) | 64 | column_name = '{}'.format(header_value) |
64 | if column_name in header_list: | 65 | if column_name in header_list: |
65 | raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name)) | 66 | raise Exception('DUPLICATE HEADER ERROR: SHEET: {}, COL: {}, CELL: {}1'.format( |
67 | sheet_title, column_name, column_letter)) | ||
66 | header_list.append(column_name) | 68 | header_list.append(column_name) |
67 | 69 | ||
68 | first_value = first_values[i] | 70 | first_value = None |
69 | 71 | try: | |
72 | first_value = first_values[i] | ||
73 | except IndexError as err: | ||
74 | raise Exception('NO VALUE IN 2ND ROW FOR HEADER ERROR. SHEET: {}, COL: {}, CELL: {}2. {}'.format( | ||
75 | sheet_title, column_name, column_letter, err)) | ||
76 | |||
70 | column_effective_value = first_value.get('effectiveValue', {}) | 77 | column_effective_value = first_value.get('effectiveValue', {}) |
71 | for key in column_effective_value.keys(): | 78 | |
72 | if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'): | 79 | col_val = None |
73 | column_effective_value_type = key | 80 | if column_effective_value == {}: |
81 | column_effective_value_type = 'stringValue' | ||
82 | LOGGER.info('WARNING: NO VALUE IN 2ND ROW FOR HEADER. SHEET: {}, COL: {}, CELL: {}2.'.format( | ||
83 | sheet_title, column_name, column_letter)) | ||
84 | LOGGER.info(' Setting column datatype to STRING') | ||
85 | else: | ||
86 | for key, val in column_effective_value.items(): | ||
87 | if key in ('numberValue', 'stringValue', 'boolValue'): | ||
88 | column_effective_value_type = key | ||
89 | col_val = str(val) | ||
90 | elif key in ('errorType', 'formulaType'): | ||
91 | col_val = str(val) | ||
92 | raise Exception('DATA TYPE ERROR 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format( | ||
93 | sheet_title, column_name, column_letter, key, col_val)) | ||
74 | 94 | ||
75 | column_number_format = first_values[i].get('effectiveFormat', {}).get( | 95 | column_number_format = first_values[i].get('effectiveFormat', {}).get( |
76 | 'numberFormat', {}) | 96 | 'numberFormat', {}) |
@@ -87,7 +107,13 @@ def get_sheet_schema_columns(sheet): | |||
87 | # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType | 107 | # https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType |
88 | # | 108 | # |
89 | column_format = None # Default | 109 | column_format = None # Default |
90 | if column_effective_value_type == 'stringValue': | 110 | if column_effective_value == {}: |
111 | col_properties = {'type': ['null', 'string']} | ||
112 | column_gs_type = 'stringValue' | ||
113 | LOGGER.info('WARNING: 2ND ROW VALUE IS BLANK: SHEET: {}, COL: {}, CELL: {}2'.format( | ||
114 | sheet_title, column_name, column_letter)) | ||
115 | LOGGER.info(' Setting column datatype to STRING') | ||
116 | elif column_effective_value_type == 'stringValue': | ||
91 | col_properties = {'type': ['null', 'string']} | 117 | col_properties = {'type': ['null', 'string']} |
92 | column_gs_type = 'stringValue' | 118 | column_gs_type = 'stringValue' |
93 | elif column_effective_value_type == 'boolValue': | 119 | elif column_effective_value_type == 'boolValue': |
@@ -138,8 +164,8 @@ def get_sheet_schema_columns(sheet): | |||
138 | else: | 164 | else: |
139 | col_properties = {'type': ['null', 'string']} | 165 | col_properties = {'type': ['null', 'string']} |
140 | column_gs_type = 'unsupportedValue' | 166 | column_gs_type = 'unsupportedValue' |
141 | LOGGER.info('Unsupported data type: {}, value: {}'.format(column_name, \ | 167 | LOGGER.info('WARNING: UNSUPPORTED 2ND ROW VALUE: SHEET: {}, COL: {}, CELL: {}2, TYPE: {}, VALUE: {}'.format( |
142 | column_effective_value_type)) | 168 | sheet_title, column_name, column_letter, column_effective_value_type, col_val)) |
143 | LOGGER.info('Converting to string.') | 169 | LOGGER.info('Converting to string.') |
144 | else: # skipped | 170 | else: # skipped |
145 | column_is_skipped = True | 171 | column_is_skipped = True |
@@ -148,11 +174,16 @@ def get_sheet_schema_columns(sheet): | |||
148 | column_name = '__sdc_skip_col_{}'.format(column_index_str) | 174 | column_name = '__sdc_skip_col_{}'.format(column_index_str) |
149 | col_properties = {'type': ['null', 'string']} | 175 | col_properties = {'type': ['null', 'string']} |
150 | column_gs_type = 'stringValue' | 176 | column_gs_type = 'stringValue' |
177 | LOGGER.info('WARNING: SKIPPED COLUMN; NO COLUMN HEADER. SHEET: {}, COL: {}, CELL: {}1'.format( | ||
178 | sheet_title, column_name, column_letter)) | ||
179 | LOGGER.info(' This column will be skipped during data loading.') | ||
151 | 180 | ||
152 | if skipped >= 2: | 181 | if skipped >= 2: |
153 | # skipped = 2 consecutive skipped headers | 182 | # skipped = 2 consecutive skipped headers |
154 | # Remove prior_header column_name | 183 | # Remove prior_header column_name |
155 | sheet_json_schema['properties'].pop(prior_header, None) | 184 | sheet_json_schema['properties'].pop(prior_header, None) |
185 | LOGGER.info('TWO CONSECUTIVE SKIPPED COLUMNS. STOPPING SCAN AT: SHEET: {}, COL: {}, CELL {}1'.format( | ||
186 | sheet_title, column_name, column_letter)) | ||
156 | break | 187 | break |
157 | 188 | ||
158 | else: | 189 | else: |
@@ -245,7 +276,7 @@ def get_schemas(client, spreadsheet_id): | |||
245 | for sheet in sheets: | 276 | for sheet in sheets: |
246 | # GET sheet_json_schema for each worksheet (from function above) | 277 | # GET sheet_json_schema for each worksheet (from function above) |
247 | sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) | 278 | sheet_json_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) |
248 | LOGGER.info('columns = {}'.format(columns)) | 279 | # LOGGER.info('columns = {}'.format(columns)) |
249 | 280 | ||
250 | sheet_title = sheet.get('properties', {}).get('title') | 281 | sheet_title = sheet.get('properties', {}).get('title') |
251 | schemas[sheet_title] = sheet_json_schema | 282 | schemas[sheet_title] = sheet_json_schema |