]> git.immae.eu Git - github/fretlink/tap-google-sheets.git/blob - tap_google_sheets/streams.py
v.0.0.2 schema and sync changes (#1)
[github/fretlink/tap-google-sheets.git] / tap_google_sheets / streams.py
1 from collections import OrderedDict
2
3 # streams: API URL endpoints to be called
4 # properties:
5 # <root node>: Plural stream name for the endpoint
6 # path: API endpoint relative path, when added to the base URL, creates the full path,
7 # default = stream_name
8 # key_properties: Primary key fields for identifying an endpoint record.
9 # replication_method: INCREMENTAL or FULL_TABLE
10 # replication_keys: bookmark_field(s), typically a date-time, used for filtering the results
11 # and setting the state
12 # params: Query, sort, and other endpoint specific parameters; default = {}
13 # data_key: JSON element containing the results list for the endpoint;
14 # default = root (no data_key)
15
16 # file_metadata: Queries Google Drive API to get file information and see if file has been modified
17 # Provides audit info about who and when last changed the file.
18 FILE_METADATA = {
19 "api": "files",
20 "path": "files/{spreadsheet_id}",
21 "key_properties": ["id"],
22 "replication_method": "INCREMENTAL",
23 "replication_keys": ["modifiedTime"],
24 "params": {
25 "fields": "id,name,createdTime,modifiedTime,version,teamDriveId,driveId,lastModifyingUser"
26 }
27 }
28
29 # spreadsheet_metadata: Queries spreadsheet to get basic information on spreadhsheet and sheets
30 SPREADSHEET_METADATA = {
31 "api": "sheets",
32 "path": "spreadsheets/{spreadsheet_id}",
33 "key_properties": ["spreadsheetId"],
34 "replication_method": "FULL_TABLE",
35 "params": {
36 "includeGridData": "false"
37 }
38 }
39
40 # sheet_metadata: Get Header Row and 1st data row (Rows 1 & 2) from a Sheet on Spreadsheet.
41 # This endpoint includes detailed metadata about each cell in the header and first data row
42 # incl. data type, formatting, etc.
43 SHEET_METADATA = {
44 "api": "sheets",
45 "path": "spreadsheets/{spreadsheet_id}",
46 "key_properties": ["sheetId"],
47 "replication_method": "FULL_TABLE",
48 "params": {
49 "includeGridData": "true",
50 "ranges": "'{sheet_title}'!1:2"
51 }
52 }
53
54 # sheets_loaded: Queries a batch of Rows for each Sheet in the Spreadsheet.
55 # Each query uses the `values` endpoint, to get data-only, w/out the formatting/type metadata.
56 SHEETS_LOADED = {
57 "api": "sheets",
58 "path": "spreadsheets/{spreadsheet_id}/values/'{sheet_title}'!{range_rows}",
59 "data_key": "values",
60 "key_properties": ["spreadsheetId", "sheetId", "loadDate"],
61 "replication_method": "FULL_TABLE",
62 "params": {
63 "dateTimeRenderOption": "SERIAL_NUMBER",
64 "valueRenderOption": "UNFORMATTED_VALUE",
65 "majorDimension": "ROWS"
66 }
67 }
68
69 # Ensure streams are ordered sequentially, logically.
70 STREAMS = OrderedDict()
71 STREAMS['file_metadata'] = FILE_METADATA
72 STREAMS['spreadsheet_metadata'] = SPREADSHEET_METADATA
73 STREAMS['sheet_metadata'] = SHEET_METADATA
74 STREAMS['sheets_loaded'] = SHEETS_LOADED