X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=tap_google_sheets%2Fstreams.py;h=ad5529f4ac3defef3ae215cfb7c1cf701e98137c;hb=0a0f2e89de6cde25ba6ef104c64e30f92091e007;hp=231a41d0cde60e8915136e7f79bbf44b1454201d;hpb=66bc495f76684fc0de611643e7c464bd46203125;p=github%2Ffretlink%2Ftap-google-sheets.git diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py index 231a41d..ad5529f 100644 --- a/tap_google_sheets/streams.py +++ b/tap_google_sheets/streams.py @@ -8,12 +8,13 @@ from collections import OrderedDict # key_properties: Primary key fields for identifying an endpoint record. # replication_method: INCREMENTAL or FULL_TABLE # replication_keys: bookmark_field(s), typically a date-time, used for filtering the results -# and setting the state +# and setting the state # params: Query, sort, and other endpoint specific parameters; default = {} -# data_key: JSON element containing the results list for the endpoint; default = root (no data_key) -# bookmark_query_field: From date-time field used for filtering the query -# bookmark_type: Data type for bookmark, integer or datetime +# data_key: JSON element containing the results list for the endpoint; +# default = root (no data_key) +# file_metadata: Queries Google Drive API to get file information and see if file has been modified +# Provides audit info about who and when last changed the file. FILE_METADATA = { "api": "files", "path": "files/{spreadsheet_id}", @@ -25,6 +26,7 @@ FILE_METADATA = { } } +# spreadsheet_metadata: Queries spreadsheet to get basic information on spreadhsheet and sheets SPREADSHEET_METADATA = { "api": "sheets", "path": "spreadsheets/{spreadsheet_id}", @@ -35,6 +37,9 @@ SPREADSHEET_METADATA = { } } +# sheet_metadata: Get Header Row and 1st data row (Rows 1 & 2) from a Sheet on Spreadsheet. +# This endpoint includes detailed metadata about each cell in the header and first data row +# incl. data type, formatting, etc. SHEET_METADATA = { "api": "sheets", "path": "spreadsheets/{spreadsheet_id}", @@ -46,6 +51,8 @@ SHEET_METADATA = { } } +# sheets_loaded: Queries a batch of Rows for each Sheet in the Spreadsheet. +# Each query uses the `values` endpoint, to get data-only, w/out the formatting/type metadata. SHEETS_LOADED = { "api": "sheets", "path": "spreadsheets/{spreadsheet_id}/values/'{sheet_title}'!{range_rows}", @@ -59,7 +66,7 @@ SHEETS_LOADED = { } } -# Ensure streams are ordered logically +# Ensure streams are ordered sequentially, logically. STREAMS = OrderedDict() STREAMS['file_metadata'] = FILE_METADATA STREAMS['spreadsheet_metadata'] = SPREADSHEET_METADATA