diff options
author | Jeff Huth <jeff.huth@bytecode.io> | 2019-11-13 17:03:56 -0800 |
---|---|---|
committer | Jeff Huth <jeff.huth@bytecode.io> | 2019-11-13 17:03:56 -0800 |
commit | 89643ba6fa98db82efd3246805ef801a8bfb5c81 (patch) | |
tree | 739027b4e827def2db81631c9d6ed58ec2b97809 | |
parent | 5f8005471d3affaaf23489df93a58ca64c3da3ca (diff) | |
download | tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.gz tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.zst tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.zip |
Initial commit
Discovery mode works. Still working on normal sync.
-rw-r--r-- | .gitignore | 22 | ||||
-rw-r--r-- | CHANGELOG.md | 4 | ||||
-rw-r--r-- | LICENSE | 661 | ||||
-rw-r--r-- | MANIFEST.in | 2 | ||||
-rw-r--r-- | README.md | 173 | ||||
-rw-r--r-- | config.json.example | 8 | ||||
-rw-r--r-- | setup.py | 25 | ||||
-rw-r--r-- | state.json.example | 6 | ||||
-rw-r--r-- | tap_google_sheets/__init__.py | 57 | ||||
-rw-r--r-- | tap_google_sheets/client.py | 247 | ||||
-rw-r--r-- | tap_google_sheets/discover.py | 26 | ||||
-rw-r--r-- | tap_google_sheets/schema.py | 228 | ||||
-rw-r--r-- | tap_google_sheets/schemas/file_metadata.json | 44 | ||||
-rw-r--r-- | tap_google_sheets/schemas/sheet_metadata.json | 89 | ||||
-rw-r--r-- | tap_google_sheets/schemas/sheets_loaded.json | 22 | ||||
-rw-r--r-- | tap_google_sheets/schemas/spreadsheet_metadata.json | 30 | ||||
-rw-r--r-- | tap_google_sheets/streams.py | 66 | ||||
-rw-r--r-- | tap_google_sheets/sync.py | 281 |
18 files changed, 1990 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9633da --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,22 @@ | |||
1 | /.vscode/* | ||
2 | /.ipynb_checkpoints/* | ||
3 | config/ | ||
4 | virtualenvs/ | ||
5 | *catalog*.json | ||
6 | *config*.json | ||
7 | *state*.json | ||
8 | target*.json | ||
9 | *.sublime-* | ||
10 | .python-version | ||
11 | singer-check-tap-data | ||
12 | *.pyc | ||
13 | *.egg-info | ||
14 | dist/ | ||
15 | __pycache__/ | ||
16 | venv/ | ||
17 | build/ | ||
18 | tap_google_sheets/.vscode/settings.json | ||
19 | *.ipynb | ||
20 | .DS_Store | ||
21 | test_configuration.py | ||
22 | tap_target_commands.sh | ||
diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..d58f396 --- /dev/null +++ b/CHANGELOG.md | |||
@@ -0,0 +1,4 @@ | |||
1 | # Changelog | ||
2 | |||
3 | ## 0.0.1 | ||
4 | * Initial commit | ||
@@ -0,0 +1,661 @@ | |||
1 | GNU AFFERO GENERAL PUBLIC LICENSE | ||
2 | Version 3, 19 November 2007 | ||
3 | |||
4 | Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> | ||
5 | Everyone is permitted to copy and distribute verbatim copies | ||
6 | of this license document, but changing it is not allowed. | ||
7 | |||
8 | Preamble | ||
9 | |||
10 | The GNU Affero General Public License is a free, copyleft license for | ||
11 | software and other kinds of works, specifically designed to ensure | ||
12 | cooperation with the community in the case of network server software. | ||
13 | |||
14 | The licenses for most software and other practical works are designed | ||
15 | to take away your freedom to share and change the works. By contrast, | ||
16 | our General Public Licenses are intended to guarantee your freedom to | ||
17 | share and change all versions of a program--to make sure it remains free | ||
18 | software for all its users. | ||
19 | |||
20 | When we speak of free software, we are referring to freedom, not | ||
21 | price. Our General Public Licenses are designed to make sure that you | ||
22 | have the freedom to distribute copies of free software (and charge for | ||
23 | them if you wish), that you receive source code or can get it if you | ||
24 | want it, that you can change the software or use pieces of it in new | ||
25 | free programs, and that you know you can do these things. | ||
26 | |||
27 | Developers that use our General Public Licenses protect your rights | ||
28 | with two steps: (1) assert copyright on the software, and (2) offer | ||
29 | you this License which gives you legal permission to copy, distribute | ||
30 | and/or modify the software. | ||
31 | |||
32 | A secondary benefit of defending all users' freedom is that | ||
33 | improvements made in alternate versions of the program, if they | ||
34 | receive widespread use, become available for other developers to | ||
35 | incorporate. Many developers of free software are heartened and | ||
36 | encouraged by the resulting cooperation. However, in the case of | ||
37 | software used on network servers, this result may fail to come about. | ||
38 | The GNU General Public License permits making a modified version and | ||
39 | letting the public access it on a server without ever releasing its | ||
40 | source code to the public. | ||
41 | |||
42 | The GNU Affero General Public License is designed specifically to | ||
43 | ensure that, in such cases, the modified source code becomes available | ||
44 | to the community. It requires the operator of a network server to | ||
45 | provide the source code of the modified version running there to the | ||
46 | users of that server. Therefore, public use of a modified version, on | ||
47 | a publicly accessible server, gives the public access to the source | ||
48 | code of the modified version. | ||
49 | |||
50 | An older license, called the Affero General Public License and | ||
51 | published by Affero, was designed to accomplish similar goals. This is | ||
52 | a different license, not a version of the Affero GPL, but Affero has | ||
53 | released a new version of the Affero GPL which permits relicensing under | ||
54 | this license. | ||
55 | |||
56 | The precise terms and conditions for copying, distribution and | ||
57 | modification follow. | ||
58 | |||
59 | TERMS AND CONDITIONS | ||
60 | |||
61 | 0. Definitions. | ||
62 | |||
63 | "This License" refers to version 3 of the GNU Affero General Public License. | ||
64 | |||
65 | "Copyright" also means copyright-like laws that apply to other kinds of | ||
66 | works, such as semiconductor masks. | ||
67 | |||
68 | "The Program" refers to any copyrightable work licensed under this | ||
69 | License. Each licensee is addressed as "you". "Licensees" and | ||
70 | "recipients" may be individuals or organizations. | ||
71 | |||
72 | To "modify" a work means to copy from or adapt all or part of the work | ||
73 | in a fashion requiring copyright permission, other than the making of an | ||
74 | exact copy. The resulting work is called a "modified version" of the | ||
75 | earlier work or a work "based on" the earlier work. | ||
76 | |||
77 | A "covered work" means either the unmodified Program or a work based | ||
78 | on the Program. | ||
79 | |||
80 | To "propagate" a work means to do anything with it that, without | ||
81 | permission, would make you directly or secondarily liable for | ||
82 | infringement under applicable copyright law, except executing it on a | ||
83 | computer or modifying a private copy. Propagation includes copying, | ||
84 | distribution (with or without modification), making available to the | ||
85 | public, and in some countries other activities as well. | ||
86 | |||
87 | To "convey" a work means any kind of propagation that enables other | ||
88 | parties to make or receive copies. Mere interaction with a user through | ||
89 | a computer network, with no transfer of a copy, is not conveying. | ||
90 | |||
91 | An interactive user interface displays "Appropriate Legal Notices" | ||
92 | to the extent that it includes a convenient and prominently visible | ||
93 | feature that (1) displays an appropriate copyright notice, and (2) | ||
94 | tells the user that there is no warranty for the work (except to the | ||
95 | extent that warranties are provided), that licensees may convey the | ||
96 | work under this License, and how to view a copy of this License. If | ||
97 | the interface presents a list of user commands or options, such as a | ||
98 | menu, a prominent item in the list meets this criterion. | ||
99 | |||
100 | 1. Source Code. | ||
101 | |||
102 | The "source code" for a work means the preferred form of the work | ||
103 | for making modifications to it. "Object code" means any non-source | ||
104 | form of a work. | ||
105 | |||
106 | A "Standard Interface" means an interface that either is an official | ||
107 | standard defined by a recognized standards body, or, in the case of | ||
108 | interfaces specified for a particular programming language, one that | ||
109 | is widely used among developers working in that language. | ||
110 | |||
111 | The "System Libraries" of an executable work include anything, other | ||
112 | than the work as a whole, that (a) is included in the normal form of | ||
113 | packaging a Major Component, but which is not part of that Major | ||
114 | Component, and (b) serves only to enable use of the work with that | ||
115 | Major Component, or to implement a Standard Interface for which an | ||
116 | implementation is available to the public in source code form. A | ||
117 | "Major Component", in this context, means a major essential component | ||
118 | (kernel, window system, and so on) of the specific operating system | ||
119 | (if any) on which the executable work runs, or a compiler used to | ||
120 | produce the work, or an object code interpreter used to run it. | ||
121 | |||
122 | The "Corresponding Source" for a work in object code form means all | ||
123 | the source code needed to generate, install, and (for an executable | ||
124 | work) run the object code and to modify the work, including scripts to | ||
125 | control those activities. However, it does not include the work's | ||
126 | System Libraries, or general-purpose tools or generally available free | ||
127 | programs which are used unmodified in performing those activities but | ||
128 | which are not part of the work. For example, Corresponding Source | ||
129 | includes interface definition files associated with source files for | ||
130 | the work, and the source code for shared libraries and dynamically | ||
131 | linked subprograms that the work is specifically designed to require, | ||
132 | such as by intimate data communication or control flow between those | ||
133 | subprograms and other parts of the work. | ||
134 | |||
135 | The Corresponding Source need not include anything that users | ||
136 | can regenerate automatically from other parts of the Corresponding | ||
137 | Source. | ||
138 | |||
139 | The Corresponding Source for a work in source code form is that | ||
140 | same work. | ||
141 | |||
142 | 2. Basic Permissions. | ||
143 | |||
144 | All rights granted under this License are granted for the term of | ||
145 | copyright on the Program, and are irrevocable provided the stated | ||
146 | conditions are met. This License explicitly affirms your unlimited | ||
147 | permission to run the unmodified Program. The output from running a | ||
148 | covered work is covered by this License only if the output, given its | ||
149 | content, constitutes a covered work. This License acknowledges your | ||
150 | rights of fair use or other equivalent, as provided by copyright law. | ||
151 | |||
152 | You may make, run and propagate covered works that you do not | ||
153 | convey, without conditions so long as your license otherwise remains | ||
154 | in force. You may convey covered works to others for the sole purpose | ||
155 | of having them make modifications exclusively for you, or provide you | ||
156 | with facilities for running those works, provided that you comply with | ||
157 | the terms of this License in conveying all material for which you do | ||
158 | not control copyright. Those thus making or running the covered works | ||
159 | for you must do so exclusively on your behalf, under your direction | ||
160 | and control, on terms that prohibit them from making any copies of | ||
161 | your copyrighted material outside their relationship with you. | ||
162 | |||
163 | Conveying under any other circumstances is permitted solely under | ||
164 | the conditions stated below. Sublicensing is not allowed; section 10 | ||
165 | makes it unnecessary. | ||
166 | |||
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. | ||
168 | |||
169 | No covered work shall be deemed part of an effective technological | ||
170 | measure under any applicable law fulfilling obligations under article | ||
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or | ||
172 | similar laws prohibiting or restricting circumvention of such | ||
173 | measures. | ||
174 | |||
175 | When you convey a covered work, you waive any legal power to forbid | ||
176 | circumvention of technological measures to the extent such circumvention | ||
177 | is effected by exercising rights under this License with respect to | ||
178 | the covered work, and you disclaim any intention to limit operation or | ||
179 | modification of the work as a means of enforcing, against the work's | ||
180 | users, your or third parties' legal rights to forbid circumvention of | ||
181 | technological measures. | ||
182 | |||
183 | 4. Conveying Verbatim Copies. | ||
184 | |||
185 | You may convey verbatim copies of the Program's source code as you | ||
186 | receive it, in any medium, provided that you conspicuously and | ||
187 | appropriately publish on each copy an appropriate copyright notice; | ||
188 | keep intact all notices stating that this License and any | ||
189 | non-permissive terms added in accord with section 7 apply to the code; | ||
190 | keep intact all notices of the absence of any warranty; and give all | ||
191 | recipients a copy of this License along with the Program. | ||
192 | |||
193 | You may charge any price or no price for each copy that you convey, | ||
194 | and you may offer support or warranty protection for a fee. | ||
195 | |||
196 | 5. Conveying Modified Source Versions. | ||
197 | |||
198 | You may convey a work based on the Program, or the modifications to | ||
199 | produce it from the Program, in the form of source code under the | ||
200 | terms of section 4, provided that you also meet all of these conditions: | ||
201 | |||
202 | a) The work must carry prominent notices stating that you modified | ||
203 | it, and giving a relevant date. | ||
204 | |||
205 | b) The work must carry prominent notices stating that it is | ||
206 | released under this License and any conditions added under section | ||
207 | 7. This requirement modifies the requirement in section 4 to | ||
208 | "keep intact all notices". | ||
209 | |||
210 | c) You must license the entire work, as a whole, under this | ||
211 | License to anyone who comes into possession of a copy. This | ||
212 | License will therefore apply, along with any applicable section 7 | ||
213 | additional terms, to the whole of the work, and all its parts, | ||
214 | regardless of how they are packaged. This License gives no | ||
215 | permission to license the work in any other way, but it does not | ||
216 | invalidate such permission if you have separately received it. | ||
217 | |||
218 | d) If the work has interactive user interfaces, each must display | ||
219 | Appropriate Legal Notices; however, if the Program has interactive | ||
220 | interfaces that do not display Appropriate Legal Notices, your | ||
221 | work need not make them do so. | ||
222 | |||
223 | A compilation of a covered work with other separate and independent | ||
224 | works, which are not by their nature extensions of the covered work, | ||
225 | and which are not combined with it such as to form a larger program, | ||
226 | in or on a volume of a storage or distribution medium, is called an | ||
227 | "aggregate" if the compilation and its resulting copyright are not | ||
228 | used to limit the access or legal rights of the compilation's users | ||
229 | beyond what the individual works permit. Inclusion of a covered work | ||
230 | in an aggregate does not cause this License to apply to the other | ||
231 | parts of the aggregate. | ||
232 | |||
233 | 6. Conveying Non-Source Forms. | ||
234 | |||
235 | You may convey a covered work in object code form under the terms | ||
236 | of sections 4 and 5, provided that you also convey the | ||
237 | machine-readable Corresponding Source under the terms of this License, | ||
238 | in one of these ways: | ||
239 | |||
240 | a) Convey the object code in, or embodied in, a physical product | ||
241 | (including a physical distribution medium), accompanied by the | ||
242 | Corresponding Source fixed on a durable physical medium | ||
243 | customarily used for software interchange. | ||
244 | |||
245 | b) Convey the object code in, or embodied in, a physical product | ||
246 | (including a physical distribution medium), accompanied by a | ||
247 | written offer, valid for at least three years and valid for as | ||
248 | long as you offer spare parts or customer support for that product | ||
249 | model, to give anyone who possesses the object code either (1) a | ||
250 | copy of the Corresponding Source for all the software in the | ||
251 | product that is covered by this License, on a durable physical | ||
252 | medium customarily used for software interchange, for a price no | ||
253 | more than your reasonable cost of physically performing this | ||
254 | conveying of source, or (2) access to copy the | ||
255 | Corresponding Source from a network server at no charge. | ||
256 | |||
257 | c) Convey individual copies of the object code with a copy of the | ||
258 | written offer to provide the Corresponding Source. This | ||
259 | alternative is allowed only occasionally and noncommercially, and | ||
260 | only if you received the object code with such an offer, in accord | ||
261 | with subsection 6b. | ||
262 | |||
263 | d) Convey the object code by offering access from a designated | ||
264 | place (gratis or for a charge), and offer equivalent access to the | ||
265 | Corresponding Source in the same way through the same place at no | ||
266 | further charge. You need not require recipients to copy the | ||
267 | Corresponding Source along with the object code. If the place to | ||
268 | copy the object code is a network server, the Corresponding Source | ||
269 | may be on a different server (operated by you or a third party) | ||
270 | that supports equivalent copying facilities, provided you maintain | ||
271 | clear directions next to the object code saying where to find the | ||
272 | Corresponding Source. Regardless of what server hosts the | ||
273 | Corresponding Source, you remain obligated to ensure that it is | ||
274 | available for as long as needed to satisfy these requirements. | ||
275 | |||
276 | e) Convey the object code using peer-to-peer transmission, provided | ||
277 | you inform other peers where the object code and Corresponding | ||
278 | Source of the work are being offered to the general public at no | ||
279 | charge under subsection 6d. | ||
280 | |||
281 | A separable portion of the object code, whose source code is excluded | ||
282 | from the Corresponding Source as a System Library, need not be | ||
283 | included in conveying the object code work. | ||
284 | |||
285 | A "User Product" is either (1) a "consumer product", which means any | ||
286 | tangible personal property which is normally used for personal, family, | ||
287 | or household purposes, or (2) anything designed or sold for incorporation | ||
288 | into a dwelling. In determining whether a product is a consumer product, | ||
289 | doubtful cases shall be resolved in favor of coverage. For a particular | ||
290 | product received by a particular user, "normally used" refers to a | ||
291 | typical or common use of that class of product, regardless of the status | ||
292 | of the particular user or of the way in which the particular user | ||
293 | actually uses, or expects or is expected to use, the product. A product | ||
294 | is a consumer product regardless of whether the product has substantial | ||
295 | commercial, industrial or non-consumer uses, unless such uses represent | ||
296 | the only significant mode of use of the product. | ||
297 | |||
298 | "Installation Information" for a User Product means any methods, | ||
299 | procedures, authorization keys, or other information required to install | ||
300 | and execute modified versions of a covered work in that User Product from | ||
301 | a modified version of its Corresponding Source. The information must | ||
302 | suffice to ensure that the continued functioning of the modified object | ||
303 | code is in no case prevented or interfered with solely because | ||
304 | modification has been made. | ||
305 | |||
306 | If you convey an object code work under this section in, or with, or | ||
307 | specifically for use in, a User Product, and the conveying occurs as | ||
308 | part of a transaction in which the right of possession and use of the | ||
309 | User Product is transferred to the recipient in perpetuity or for a | ||
310 | fixed term (regardless of how the transaction is characterized), the | ||
311 | Corresponding Source conveyed under this section must be accompanied | ||
312 | by the Installation Information. But this requirement does not apply | ||
313 | if neither you nor any third party retains the ability to install | ||
314 | modified object code on the User Product (for example, the work has | ||
315 | been installed in ROM). | ||
316 | |||
317 | The requirement to provide Installation Information does not include a | ||
318 | requirement to continue to provide support service, warranty, or updates | ||
319 | for a work that has been modified or installed by the recipient, or for | ||
320 | the User Product in which it has been modified or installed. Access to a | ||
321 | network may be denied when the modification itself materially and | ||
322 | adversely affects the operation of the network or violates the rules and | ||
323 | protocols for communication across the network. | ||
324 | |||
325 | Corresponding Source conveyed, and Installation Information provided, | ||
326 | in accord with this section must be in a format that is publicly | ||
327 | documented (and with an implementation available to the public in | ||
328 | source code form), and must require no special password or key for | ||
329 | unpacking, reading or copying. | ||
330 | |||
331 | 7. Additional Terms. | ||
332 | |||
333 | "Additional permissions" are terms that supplement the terms of this | ||
334 | License by making exceptions from one or more of its conditions. | ||
335 | Additional permissions that are applicable to the entire Program shall | ||
336 | be treated as though they were included in this License, to the extent | ||
337 | that they are valid under applicable law. If additional permissions | ||
338 | apply only to part of the Program, that part may be used separately | ||
339 | under those permissions, but the entire Program remains governed by | ||
340 | this License without regard to the additional permissions. | ||
341 | |||
342 | When you convey a copy of a covered work, you may at your option | ||
343 | remove any additional permissions from that copy, or from any part of | ||
344 | it. (Additional permissions may be written to require their own | ||
345 | removal in certain cases when you modify the work.) You may place | ||
346 | additional permissions on material, added by you to a covered work, | ||
347 | for which you have or can give appropriate copyright permission. | ||
348 | |||
349 | Notwithstanding any other provision of this License, for material you | ||
350 | add to a covered work, you may (if authorized by the copyright holders of | ||
351 | that material) supplement the terms of this License with terms: | ||
352 | |||
353 | a) Disclaiming warranty or limiting liability differently from the | ||
354 | terms of sections 15 and 16 of this License; or | ||
355 | |||
356 | b) Requiring preservation of specified reasonable legal notices or | ||
357 | author attributions in that material or in the Appropriate Legal | ||
358 | Notices displayed by works containing it; or | ||
359 | |||
360 | c) Prohibiting misrepresentation of the origin of that material, or | ||
361 | requiring that modified versions of such material be marked in | ||
362 | reasonable ways as different from the original version; or | ||
363 | |||
364 | d) Limiting the use for publicity purposes of names of licensors or | ||
365 | authors of the material; or | ||
366 | |||
367 | e) Declining to grant rights under trademark law for use of some | ||
368 | trade names, trademarks, or service marks; or | ||
369 | |||
370 | f) Requiring indemnification of licensors and authors of that | ||
371 | material by anyone who conveys the material (or modified versions of | ||
372 | it) with contractual assumptions of liability to the recipient, for | ||
373 | any liability that these contractual assumptions directly impose on | ||
374 | those licensors and authors. | ||
375 | |||
376 | All other non-permissive additional terms are considered "further | ||
377 | restrictions" within the meaning of section 10. If the Program as you | ||
378 | received it, or any part of it, contains a notice stating that it is | ||
379 | governed by this License along with a term that is a further | ||
380 | restriction, you may remove that term. If a license document contains | ||
381 | a further restriction but permits relicensing or conveying under this | ||
382 | License, you may add to a covered work material governed by the terms | ||
383 | of that license document, provided that the further restriction does | ||
384 | not survive such relicensing or conveying. | ||
385 | |||
386 | If you add terms to a covered work in accord with this section, you | ||
387 | must place, in the relevant source files, a statement of the | ||
388 | additional terms that apply to those files, or a notice indicating | ||
389 | where to find the applicable terms. | ||
390 | |||
391 | Additional terms, permissive or non-permissive, may be stated in the | ||
392 | form of a separately written license, or stated as exceptions; | ||
393 | the above requirements apply either way. | ||
394 | |||
395 | 8. Termination. | ||
396 | |||
397 | You may not propagate or modify a covered work except as expressly | ||
398 | provided under this License. Any attempt otherwise to propagate or | ||
399 | modify it is void, and will automatically terminate your rights under | ||
400 | this License (including any patent licenses granted under the third | ||
401 | paragraph of section 11). | ||
402 | |||
403 | However, if you cease all violation of this License, then your | ||
404 | license from a particular copyright holder is reinstated (a) | ||
405 | provisionally, unless and until the copyright holder explicitly and | ||
406 | finally terminates your license, and (b) permanently, if the copyright | ||
407 | holder fails to notify you of the violation by some reasonable means | ||
408 | prior to 60 days after the cessation. | ||
409 | |||
410 | Moreover, your license from a particular copyright holder is | ||
411 | reinstated permanently if the copyright holder notifies you of the | ||
412 | violation by some reasonable means, this is the first time you have | ||
413 | received notice of violation of this License (for any work) from that | ||
414 | copyright holder, and you cure the violation prior to 30 days after | ||
415 | your receipt of the notice. | ||
416 | |||
417 | Termination of your rights under this section does not terminate the | ||
418 | licenses of parties who have received copies or rights from you under | ||
419 | this License. If your rights have been terminated and not permanently | ||
420 | reinstated, you do not qualify to receive new licenses for the same | ||
421 | material under section 10. | ||
422 | |||
423 | 9. Acceptance Not Required for Having Copies. | ||
424 | |||
425 | You are not required to accept this License in order to receive or | ||
426 | run a copy of the Program. Ancillary propagation of a covered work | ||
427 | occurring solely as a consequence of using peer-to-peer transmission | ||
428 | to receive a copy likewise does not require acceptance. However, | ||
429 | nothing other than this License grants you permission to propagate or | ||
430 | modify any covered work. These actions infringe copyright if you do | ||
431 | not accept this License. Therefore, by modifying or propagating a | ||
432 | covered work, you indicate your acceptance of this License to do so. | ||
433 | |||
434 | 10. Automatic Licensing of Downstream Recipients. | ||
435 | |||
436 | Each time you convey a covered work, the recipient automatically | ||
437 | receives a license from the original licensors, to run, modify and | ||
438 | propagate that work, subject to this License. You are not responsible | ||
439 | for enforcing compliance by third parties with this License. | ||
440 | |||
441 | An "entity transaction" is a transaction transferring control of an | ||
442 | organization, or substantially all assets of one, or subdividing an | ||
443 | organization, or merging organizations. If propagation of a covered | ||
444 | work results from an entity transaction, each party to that | ||
445 | transaction who receives a copy of the work also receives whatever | ||
446 | licenses to the work the party's predecessor in interest had or could | ||
447 | give under the previous paragraph, plus a right to possession of the | ||
448 | Corresponding Source of the work from the predecessor in interest, if | ||
449 | the predecessor has it or can get it with reasonable efforts. | ||
450 | |||
451 | You may not impose any further restrictions on the exercise of the | ||
452 | rights granted or affirmed under this License. For example, you may | ||
453 | not impose a license fee, royalty, or other charge for exercise of | ||
454 | rights granted under this License, and you may not initiate litigation | ||
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that | ||
456 | any patent claim is infringed by making, using, selling, offering for | ||
457 | sale, or importing the Program or any portion of it. | ||
458 | |||
459 | 11. Patents. | ||
460 | |||
461 | A "contributor" is a copyright holder who authorizes use under this | ||
462 | License of the Program or a work on which the Program is based. The | ||
463 | work thus licensed is called the contributor's "contributor version". | ||
464 | |||
465 | A contributor's "essential patent claims" are all patent claims | ||
466 | owned or controlled by the contributor, whether already acquired or | ||
467 | hereafter acquired, that would be infringed by some manner, permitted | ||
468 | by this License, of making, using, or selling its contributor version, | ||
469 | but do not include claims that would be infringed only as a | ||
470 | consequence of further modification of the contributor version. For | ||
471 | purposes of this definition, "control" includes the right to grant | ||
472 | patent sublicenses in a manner consistent with the requirements of | ||
473 | this License. | ||
474 | |||
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free | ||
476 | patent license under the contributor's essential patent claims, to | ||
477 | make, use, sell, offer for sale, import and otherwise run, modify and | ||
478 | propagate the contents of its contributor version. | ||
479 | |||
480 | In the following three paragraphs, a "patent license" is any express | ||
481 | agreement or commitment, however denominated, not to enforce a patent | ||
482 | (such as an express permission to practice a patent or covenant not to | ||
483 | sue for patent infringement). To "grant" such a patent license to a | ||
484 | party means to make such an agreement or commitment not to enforce a | ||
485 | patent against the party. | ||
486 | |||
487 | If you convey a covered work, knowingly relying on a patent license, | ||
488 | and the Corresponding Source of the work is not available for anyone | ||
489 | to copy, free of charge and under the terms of this License, through a | ||
490 | publicly available network server or other readily accessible means, | ||
491 | then you must either (1) cause the Corresponding Source to be so | ||
492 | available, or (2) arrange to deprive yourself of the benefit of the | ||
493 | patent license for this particular work, or (3) arrange, in a manner | ||
494 | consistent with the requirements of this License, to extend the patent | ||
495 | license to downstream recipients. "Knowingly relying" means you have | ||
496 | actual knowledge that, but for the patent license, your conveying the | ||
497 | covered work in a country, or your recipient's use of the covered work | ||
498 | in a country, would infringe one or more identifiable patents in that | ||
499 | country that you have reason to believe are valid. | ||
500 | |||
501 | If, pursuant to or in connection with a single transaction or | ||
502 | arrangement, you convey, or propagate by procuring conveyance of, a | ||
503 | covered work, and grant a patent license to some of the parties | ||
504 | receiving the covered work authorizing them to use, propagate, modify | ||
505 | or convey a specific copy of the covered work, then the patent license | ||
506 | you grant is automatically extended to all recipients of the covered | ||
507 | work and works based on it. | ||
508 | |||
509 | A patent license is "discriminatory" if it does not include within | ||
510 | the scope of its coverage, prohibits the exercise of, or is | ||
511 | conditioned on the non-exercise of one or more of the rights that are | ||
512 | specifically granted under this License. You may not convey a covered | ||
513 | work if you are a party to an arrangement with a third party that is | ||
514 | in the business of distributing software, under which you make payment | ||
515 | to the third party based on the extent of your activity of conveying | ||
516 | the work, and under which the third party grants, to any of the | ||
517 | parties who would receive the covered work from you, a discriminatory | ||
518 | patent license (a) in connection with copies of the covered work | ||
519 | conveyed by you (or copies made from those copies), or (b) primarily | ||
520 | for and in connection with specific products or compilations that | ||
521 | contain the covered work, unless you entered into that arrangement, | ||
522 | or that patent license was granted, prior to 28 March 2007. | ||
523 | |||
524 | Nothing in this License shall be construed as excluding or limiting | ||
525 | any implied license or other defenses to infringement that may | ||
526 | otherwise be available to you under applicable patent law. | ||
527 | |||
528 | 12. No Surrender of Others' Freedom. | ||
529 | |||
530 | If conditions are imposed on you (whether by court order, agreement or | ||
531 | otherwise) that contradict the conditions of this License, they do not | ||
532 | excuse you from the conditions of this License. If you cannot convey a | ||
533 | covered work so as to satisfy simultaneously your obligations under this | ||
534 | License and any other pertinent obligations, then as a consequence you may | ||
535 | not convey it at all. For example, if you agree to terms that obligate you | ||
536 | to collect a royalty for further conveying from those to whom you convey | ||
537 | the Program, the only way you could satisfy both those terms and this | ||
538 | License would be to refrain entirely from conveying the Program. | ||
539 | |||
540 | 13. Remote Network Interaction; Use with the GNU General Public License. | ||
541 | |||
542 | Notwithstanding any other provision of this License, if you modify the | ||
543 | Program, your modified version must prominently offer all users | ||
544 | interacting with it remotely through a computer network (if your version | ||
545 | supports such interaction) an opportunity to receive the Corresponding | ||
546 | Source of your version by providing access to the Corresponding Source | ||
547 | from a network server at no charge, through some standard or customary | ||
548 | means of facilitating copying of software. This Corresponding Source | ||
549 | shall include the Corresponding Source for any work covered by version 3 | ||
550 | of the GNU General Public License that is incorporated pursuant to the | ||
551 | following paragraph. | ||
552 | |||
553 | Notwithstanding any other provision of this License, you have | ||
554 | permission to link or combine any covered work with a work licensed | ||
555 | under version 3 of the GNU General Public License into a single | ||
556 | combined work, and to convey the resulting work. The terms of this | ||
557 | License will continue to apply to the part which is the covered work, | ||
558 | but the work with which it is combined will remain governed by version | ||
559 | 3 of the GNU General Public License. | ||
560 | |||
561 | 14. Revised Versions of this License. | ||
562 | |||
563 | The Free Software Foundation may publish revised and/or new versions of | ||
564 | the GNU Affero General Public License from time to time. Such new versions | ||
565 | will be similar in spirit to the present version, but may differ in detail to | ||
566 | address new problems or concerns. | ||
567 | |||
568 | Each version is given a distinguishing version number. If the | ||
569 | Program specifies that a certain numbered version of the GNU Affero General | ||
570 | Public License "or any later version" applies to it, you have the | ||
571 | option of following the terms and conditions either of that numbered | ||
572 | version or of any later version published by the Free Software | ||
573 | Foundation. If the Program does not specify a version number of the | ||
574 | GNU Affero General Public License, you may choose any version ever published | ||
575 | by the Free Software Foundation. | ||
576 | |||
577 | If the Program specifies that a proxy can decide which future | ||
578 | versions of the GNU Affero General Public License can be used, that proxy's | ||
579 | public statement of acceptance of a version permanently authorizes you | ||
580 | to choose that version for the Program. | ||
581 | |||
582 | Later license versions may give you additional or different | ||
583 | permissions. However, no additional obligations are imposed on any | ||
584 | author or copyright holder as a result of your choosing to follow a | ||
585 | later version. | ||
586 | |||
587 | 15. Disclaimer of Warranty. | ||
588 | |||
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY | ||
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT | ||
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY | ||
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, | ||
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM | ||
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF | ||
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. | ||
597 | |||
598 | 16. Limitation of Liability. | ||
599 | |||
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING | ||
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS | ||
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY | ||
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE | ||
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF | ||
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD | ||
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), | ||
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF | ||
608 | SUCH DAMAGES. | ||
609 | |||
610 | 17. Interpretation of Sections 15 and 16. | ||
611 | |||
612 | If the disclaimer of warranty and limitation of liability provided | ||
613 | above cannot be given local legal effect according to their terms, | ||
614 | reviewing courts shall apply local law that most closely approximates | ||
615 | an absolute waiver of all civil liability in connection with the | ||
616 | Program, unless a warranty or assumption of liability accompanies a | ||
617 | copy of the Program in return for a fee. | ||
618 | |||
619 | END OF TERMS AND CONDITIONS | ||
620 | |||
621 | How to Apply These Terms to Your New Programs | ||
622 | |||
623 | If you develop a new program, and you want it to be of the greatest | ||
624 | possible use to the public, the best way to achieve this is to make it | ||
625 | free software which everyone can redistribute and change under these terms. | ||
626 | |||
627 | To do so, attach the following notices to the program. It is safest | ||
628 | to attach them to the start of each source file to most effectively | ||
629 | state the exclusion of warranty; and each file should have at least | ||
630 | the "copyright" line and a pointer to where the full notice is found. | ||
631 | |||
632 | <one line to give the program's name and a brief idea of what it does.> | ||
633 | Copyright (C) <year> <name of author> | ||
634 | |||
635 | This program is free software: you can redistribute it and/or modify | ||
636 | it under the terms of the GNU Affero General Public License as published | ||
637 | by the Free Software Foundation, either version 3 of the License, or | ||
638 | (at your option) any later version. | ||
639 | |||
640 | This program is distributed in the hope that it will be useful, | ||
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
643 | GNU Affero General Public License for more details. | ||
644 | |||
645 | You should have received a copy of the GNU Affero General Public License | ||
646 | along with this program. If not, see <https://www.gnu.org/licenses/>. | ||
647 | |||
648 | Also add information on how to contact you by electronic and paper mail. | ||
649 | |||
650 | If your software can interact with users remotely through a computer | ||
651 | network, you should also make sure that it provides a way for users to | ||
652 | get its source. For example, if your program is a web application, its | ||
653 | interface could display a "Source" link that leads users to an archive | ||
654 | of the code. There are many ways you could offer source, and different | ||
655 | solutions will be better for different programs; see section 13 for the | ||
656 | specific requirements. | ||
657 | |||
658 | You should also get your employer (if you work as a programmer) or school, | ||
659 | if any, to sign a "copyright disclaimer" for the program, if necessary. | ||
660 | For more information on this, and how to apply and follow the GNU AGPL, see | ||
661 | <https://www.gnu.org/licenses/>. | ||
diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..32a5d00 --- /dev/null +++ b/MANIFEST.in | |||
@@ -0,0 +1,2 @@ | |||
1 | include LICENSE | ||
2 | include tap_google_sheets/schemas/*.json | ||
@@ -1 +1,172 @@ | |||
1 | tap-google-sheets | 1 | # tap-google-sheets |
2 | |||
3 | This is a [Singer](https://singer.io) tap that produces JSON-formatted data | ||
4 | following the [Singer | ||
5 | spec](https://github.com/singer-io/getting-started/blob/master/SPEC.md). | ||
6 | |||
7 | This tap: | ||
8 | |||
9 | - Pulls raw data from the [Google Sheets v4 API](https://developers.google.com/sheets/api) | ||
10 | - Extracts the following endpoints: | ||
11 | - [Spreadsheet File](https://developers.google.com/drive/api/v3/reference/files/get) | ||
12 | - [Spreadsheet Metadata, Sheets, Headers](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/get) | ||
13 | - [Spreadsheet Values](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get) | ||
14 | - For each Sheet: | ||
15 | - Outputs the schema for each resource (based on the column header and datatypes of first row of data) | ||
16 | - Outputs a record for all columns with column headers, and for each row of data until it reaches an empty row | ||
17 | |||
18 | ## API Endpoints | ||
19 | [**file (GET)**](https://developers.google.com/drive/api/v3/reference/files/get) | ||
20 | - Endpoint: https://www.googleapis.com/drive/v3/files/${spreadsheet_id}?fields=id,name,createdTime,modifiedTime,version | ||
21 | - Primary keys: id | ||
22 | - Replication strategy: Full (GET file audit data for spreadsheet_id in config) | ||
23 | - Process/Transformations: Replicate Data if Modified | ||
24 | |||
25 | [**metadata (GET)**](https://developers.google.com/drive/api/v3/reference/files/get) | ||
26 | - Endpoint: https://sheets.googleapis.com/v4/spreadsheets/${spreadsheet_id}?includeGridData=true&ranges=1:2 | ||
27 | - This endpoint eturns spreadsheet metadata, sheet metadata, and value metadata (data type information) | ||
28 | - Primary keys: spreadsheetId, title, field_name | ||
29 | - Foreign keys: None | ||
30 | - Replication strategy: Full (get and replace file metadata for spreadshee_id in config) | ||
31 | - Process/Transformations: | ||
32 | - Verify Sheets: Check sheets exist (compared to catalog) and check gridProperties (available area) | ||
33 | - sheetId, title, index, gridProperties (rowCount, columnCount) | ||
34 | - Verify Field Headers (1st row): Check field headers exist (compared to catalog), missing headers (columns to skip), column order/position, and column uniqueness | ||
35 | - Header's field_name, position: data.rowData[0].values[i].formattedValue | ||
36 | - Create/Verify Datatypes (2nd row): | ||
37 | - Row 2's datatype, format: data.rowData[1].values[i] | ||
38 | - First check: | ||
39 | - [effectiveValue: key](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue) | ||
40 | - Valid types: numberValue, stringValue, boolValue | ||
41 | - Invalid types: formulaValue, errorValue | ||
42 | - Then check: | ||
43 | - [effectiveFormat.numberFormat.type](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType) | ||
44 | - Valid types: UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, TIME, DATE_TIME, SCIENTIFIC | ||
45 | - If DATE or DATE_TIME, set JSON schema datatype = string and format = date-time | ||
46 | - [effectiveFormat.numberFormat.pattern](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormat) | ||
47 | |||
48 | [**values (GET)**](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get) | ||
49 | - Endpoint: https://sheets.googleapis.com/v4/spreadsheets/${spreadsheet_id}/values/'${sheet_name}'!${row_range}?dateTimeRenderOption=SERIAL_NUMBER&valueRenderOption=UNFORMATTED_VALUE&majorDimension=ROWS | ||
50 | - This endpoint loops through sheets and row ranges to get the [unformatted values](https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption) (effective values only), dates and datetimes as [serial numbers](https://developers.google.com/sheets/api/reference/rest/v4/DateTimeRenderOption) | ||
51 | - Primary keys: row | ||
52 | - Replication strategy: Full (GET file audit data for spreadsheet_id in config) | ||
53 | - Process/Transformations: | ||
54 | - Loop through sheets (compared to catalog selection) | ||
55 | - Send metadata for sheet | ||
56 | - Loop through ranges of rows until reaching empty row or area max row (from sheet metadata) | ||
57 | - Transform values, if necessary (dates, date-times, boolean, integer, numers) | ||
58 | - Process/send records | ||
59 | |||
60 | ## Authentication | ||
61 | The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=1FojlvtLwS0-BzGS37R0jEXtwSHqSiO1Uw-7RKQQO-C4) Google Doc provides instructions show how to configure the Google Cloud API credentials to enable Google Drive and Google Sheets APIs, configure Google Cloud to authorize/verify your domain ownership, generate an API key (client_id, client_secret), authenticate and generate a refresh_token, and prepare your tap config.json with the necessary parameters. | ||
62 | - Enable Googe Drive APIs and Authorization Scope: https://www.googleapis.com/auth/drive.metadata.readonly | ||
63 | - Enable Google Sheets API and Authorization Scope: https://www.googleapis.com/auth/spreadsheets.readonly | ||
64 | - Tap config.json parameters: | ||
65 | - client_id: identifies your application | ||
66 | - client_secret: authenticates your application | ||
67 | - refresh_token: generates an access token to authorize your session | ||
68 | - spreadsheet_id: unique identifier for each spreadsheet in Google Drive | ||
69 | - start_date: absolute minimum start date to check file modified | ||
70 | - user_agent: tap-name and email address; identifies your application in the Remote API server logs | ||
71 | |||
72 | ## Quick Start | ||
73 | |||
74 | 1. Install | ||
75 | |||
76 | Clone this repository, and then install using setup.py. We recommend using a virtualenv: | ||
77 | |||
78 | ```bash | ||
79 | > virtualenv -p python3 venv | ||
80 | > source venv/bin/activate | ||
81 | > python setup.py install | ||
82 | OR | ||
83 | > cd .../tap-google-sheets | ||
84 | > pip install . | ||
85 | ``` | ||
86 | 2. Dependent libraries | ||
87 | The following dependent libraries were installed. | ||
88 | ```bash | ||
89 | > pip install target-json | ||
90 | > pip install target-stitch | ||
91 | > pip install singer-tools | ||
92 | > pip install singer-python | ||
93 | ``` | ||
94 | - [singer-tools](https://github.com/singer-io/singer-tools) | ||
95 | - [target-stitch](https://github.com/singer-io/target-stitch) | ||
96 | |||
97 | 3. Create your tap's `config.json` file. Include the client_id, client_secret, refresh_token, site_urls (website URL properties in a comma delimited list; do not include the domain-level property in the list), start_date (UTC format), and user_agent (tap name with the api user email address). | ||
98 | |||
99 | ```json | ||
100 | { | ||
101 | "client_id": "YOUR_CLIENT_ID", | ||
102 | "client_secret": "YOUR_CLIENT_SECRET", | ||
103 | "refresh_token": "YOUR_REFRESH_TOKEN", | ||
104 | "spreadsheet_id": "YOUR_GOOGLE_SPREADSHEET_ID", | ||
105 | "start_date": "2019-01-01T00:00:00Z", | ||
106 | "user_agent": "tap-google-sheets <api_user_email@example.com>" | ||
107 | } | ||
108 | ``` | ||
109 | |||
110 | Optionally, also create a `state.json` file. `currently_syncing` is an optional attribute used for identifying the last object to be synced in case the job is interrupted mid-stream. The next run would begin where the last job left off. | ||
111 | Only the `performance_reports` uses a bookmark. The date-time bookmark is stored in a nested structure based on the endpoint, site, and sub_type. | ||
112 | |||
113 | ```json | ||
114 | { | ||
115 | "currently_syncing": "file_metadata", | ||
116 | "bookmarks": { | ||
117 | "file_metadata": "2019-09-27T22:34:39.000000Z", | ||
118 | "sheets_loaded": "2019-09-27T21:55:42.000000Z" | ||
119 | } | ||
120 | } | ||
121 | ``` | ||
122 | |||
123 | 4. Run the Tap in Discovery Mode | ||
124 | This creates a catalog.json for selecting objects/fields to integrate: | ||
125 | ```bash | ||
126 | tap-google-sheets --config config.json --discover > catalog.json | ||
127 | ``` | ||
128 | See the Singer docs on discovery mode | ||
129 | [here](https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#discovery-mode). | ||
130 | |||
131 | 5. Run the Tap in Sync Mode (with catalog) and [write out to state file](https://github.com/singer-io/getting-started/blob/master/docs/RUNNING_AND_DEVELOPING.md#running-a-singer-tap-with-a-singer-target) | ||
132 | |||
133 | For Sync mode: | ||
134 | ```bash | ||
135 | > tap-google-sheets --config tap_config.json --catalog catalog.json > state.json | ||
136 | > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json | ||
137 | ``` | ||
138 | To load to json files to verify outputs: | ||
139 | ```bash | ||
140 | > tap-google-sheets --config tap_config.json --catalog catalog.json | target-json > state.json | ||
141 | > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json | ||
142 | ``` | ||
143 | To pseudo-load to [Stitch Import API](https://github.com/singer-io/target-stitch) with dry run: | ||
144 | ```bash | ||
145 | > tap-google-sheets --config tap_config.json --catalog catalog.json | target-stitch --config target_config.json --dry-run > state.json | ||
146 | > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json | ||
147 | ``` | ||
148 | |||
149 | 6. Test the Tap | ||
150 | |||
151 | While developing the Google Search Console tap, the following utilities were run in accordance with Singer.io best practices: | ||
152 | Pylint to improve [code quality](https://github.com/singer-io/getting-started/blob/master/docs/BEST_PRACTICES.md#code-quality): | ||
153 | ```bash | ||
154 | > pylint tap_google_sheets -d missing-docstring -d logging-format-interpolation -d too-many-locals -d too-many-arguments | ||
155 | ``` | ||
156 | Pylint test resulted in the following score: | ||
157 | ```bash | ||
158 | TBD | ||
159 | ``` | ||
160 | |||
161 | To [check the tap](https://github.com/singer-io/singer-tools#singer-check-tap) and verify working: | ||
162 | ```bash | ||
163 | > tap-google-sheets --config tap_config.json --catalog catalog.json | singer-check-tap > state.json | ||
164 | > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json | ||
165 | ``` | ||
166 | Check tap resulted in the following: | ||
167 | ```bash | ||
168 | TBD | ||
169 | ``` | ||
170 | --- | ||
171 | |||
172 | Copyright © 2019 Stitch | ||
diff --git a/config.json.example b/config.json.example new file mode 100644 index 0000000..159aafb --- /dev/null +++ b/config.json.example | |||
@@ -0,0 +1,8 @@ | |||
1 | { | ||
2 | "client_id": "YOUR_CLIENT_ID", | ||
3 | "client_secret": "YOUR_CLIENT_SECRET", | ||
4 | "refresh_token": "YOUR_REFRESH_TOKEN", | ||
5 | "spreadsheet_id": "YOUR_GOOGLE_SPREADSHEET_ID", | ||
6 | "start_date": "2019-01-01T00:00:00Z", | ||
7 | "user_agent": "tap-google-search-console <api_user_email@example.com>" | ||
8 | } | ||
diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..e3c4f3e --- /dev/null +++ b/setup.py | |||
@@ -0,0 +1,25 @@ | |||
1 | #!/usr/bin/env python | ||
2 | |||
3 | from setuptools import setup, find_packages | ||
4 | |||
5 | setup(name='tap-google-sheets', | ||
6 | version='0.0.1', | ||
7 | description='Singer.io tap for extracting data from the Google Sheets v4 API', | ||
8 | author='jeff.huth@bytecode.io', | ||
9 | classifiers=['Programming Language :: Python :: 3 :: Only'], | ||
10 | py_modules=['tap_google_sheets'], | ||
11 | install_requires=[ | ||
12 | 'backoff==1.8.0', | ||
13 | 'requests==2.22.0', | ||
14 | 'singer-python==5.8.1' | ||
15 | ], | ||
16 | entry_points=''' | ||
17 | [console_scripts] | ||
18 | tap-google-sheets=tap_google_sheets:main | ||
19 | ''', | ||
20 | packages=find_packages(), | ||
21 | package_data={ | ||
22 | 'tap_google_sheets': [ | ||
23 | 'schemas/*.json' | ||
24 | ] | ||
25 | }) | ||
diff --git a/state.json.example b/state.json.example new file mode 100644 index 0000000..c33cc8f --- /dev/null +++ b/state.json.example | |||
@@ -0,0 +1,6 @@ | |||
1 | { | ||
2 | "currently_syncing": "spreadsheets", | ||
3 | "bookmarks": { | ||
4 | "file": "2019-09-27T22:34:39.000000Z" | ||
5 | } | ||
6 | } | ||
diff --git a/tap_google_sheets/__init__.py b/tap_google_sheets/__init__.py new file mode 100644 index 0000000..f97d4b8 --- /dev/null +++ b/tap_google_sheets/__init__.py | |||
@@ -0,0 +1,57 @@ | |||
1 | #!/usr/bin/env python3 | ||
2 | |||
3 | import sys | ||
4 | import json | ||
5 | import argparse | ||
6 | import singer | ||
7 | from singer import metadata, utils | ||
8 | from tap_google_sheets.client import GoogleClient | ||
9 | from tap_google_sheets.discover import discover | ||
10 | from tap_google_sheets.sync import sync | ||
11 | |||
12 | LOGGER = singer.get_logger() | ||
13 | |||
14 | REQUIRED_CONFIG_KEYS = [ | ||
15 | 'client_id', | ||
16 | 'client_secret', | ||
17 | 'refresh_token', | ||
18 | 'spreadsheet_id', | ||
19 | 'start_date', | ||
20 | 'user_agent' | ||
21 | ] | ||
22 | |||
23 | def do_discover(client, spreadsheet_id): | ||
24 | |||
25 | LOGGER.info('Starting discover') | ||
26 | catalog = discover(client, spreadsheet_id) | ||
27 | json.dump(catalog.to_dict(), sys.stdout, indent=2) | ||
28 | LOGGER.info('Finished discover') | ||
29 | |||
30 | |||
31 | @singer.utils.handle_top_exception(LOGGER) | ||
32 | def main(): | ||
33 | |||
34 | parsed_args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS) | ||
35 | |||
36 | with GoogleClient(parsed_args.config['client_id'], | ||
37 | parsed_args.config['client_secret'], | ||
38 | parsed_args.config['refresh_token'], | ||
39 | parsed_args.config['user_agent']) as client: | ||
40 | |||
41 | state = {} | ||
42 | if parsed_args.state: | ||
43 | state = parsed_args.state | ||
44 | |||
45 | config = parsed_args.config | ||
46 | spreadsheet_id = config.get('spreadsheet_id') | ||
47 | |||
48 | if parsed_args.discover: | ||
49 | do_discover(client, spreadsheet_id) | ||
50 | elif parsed_args.catalog: | ||
51 | sync(client=client, | ||
52 | config=config, | ||
53 | catalog=parsed_args.catalog, | ||
54 | state=state) | ||
55 | |||
56 | if __name__ == '__main__': | ||
57 | main() | ||
diff --git a/tap_google_sheets/client.py b/tap_google_sheets/client.py new file mode 100644 index 0000000..12f0811 --- /dev/null +++ b/tap_google_sheets/client.py | |||
@@ -0,0 +1,247 @@ | |||
1 | from datetime import datetime, timedelta | ||
2 | import backoff | ||
3 | import requests | ||
4 | from collections import OrderedDict | ||
5 | |||
6 | import singer | ||
7 | from singer import metrics | ||
8 | from singer import utils | ||
9 | |||
10 | BASE_URL = 'https://www.googleapis.com' | ||
11 | GOOGLE_TOKEN_URI = 'https://oauth2.googleapis.com/token' | ||
12 | LOGGER = singer.get_logger() | ||
13 | |||
14 | |||
15 | class Server5xxError(Exception): | ||
16 | pass | ||
17 | |||
18 | |||
19 | class Server429Error(Exception): | ||
20 | pass | ||
21 | |||
22 | |||
23 | class GoogleError(Exception): | ||
24 | pass | ||
25 | |||
26 | |||
27 | class GoogleBadRequestError(GoogleError): | ||
28 | pass | ||
29 | |||
30 | |||
31 | class GoogleUnauthorizedError(GoogleError): | ||
32 | pass | ||
33 | |||
34 | |||
35 | class GooglePaymentRequiredError(GoogleError): | ||
36 | pass | ||
37 | |||
38 | |||
39 | class GoogleNotFoundError(GoogleError): | ||
40 | pass | ||
41 | |||
42 | |||
43 | class GoogleMethodNotAllowedError(GoogleError): | ||
44 | pass | ||
45 | |||
46 | |||
47 | class GoogleConflictError(GoogleError): | ||
48 | pass | ||
49 | |||
50 | |||
51 | class GoogleGoneError(GoogleError): | ||
52 | pass | ||
53 | |||
54 | |||
55 | class GooglePreconditionFailedError(GoogleError): | ||
56 | pass | ||
57 | |||
58 | |||
59 | class GoogleRequestEntityTooLargeError(GoogleError): | ||
60 | pass | ||
61 | |||
62 | |||
63 | class GoogleRequestedRangeNotSatisfiableError(GoogleError): | ||
64 | pass | ||
65 | |||
66 | |||
67 | class GoogleExpectationFailedError(GoogleError): | ||
68 | pass | ||
69 | |||
70 | |||
71 | class GoogleForbiddenError(GoogleError): | ||
72 | pass | ||
73 | |||
74 | |||
75 | class GoogleUnprocessableEntityError(GoogleError): | ||
76 | pass | ||
77 | |||
78 | |||
79 | class GooglePreconditionRequiredError(GoogleError): | ||
80 | pass | ||
81 | |||
82 | |||
83 | class GoogleInternalServiceError(GoogleError): | ||
84 | pass | ||
85 | |||
86 | |||
87 | # Error Codes: https://developers.google.com/webmaster-tools/search-console-api-original/v3/errors | ||
88 | ERROR_CODE_EXCEPTION_MAPPING = { | ||
89 | 400: GoogleBadRequestError, | ||
90 | 401: GoogleUnauthorizedError, | ||
91 | 402: GooglePaymentRequiredError, | ||
92 | 403: GoogleForbiddenError, | ||
93 | 404: GoogleNotFoundError, | ||
94 | 405: GoogleMethodNotAllowedError, | ||
95 | 409: GoogleConflictError, | ||
96 | 410: GoogleGoneError, | ||
97 | 412: GooglePreconditionFailedError, | ||
98 | 413: GoogleRequestEntityTooLargeError, | ||
99 | 416: GoogleRequestedRangeNotSatisfiableError, | ||
100 | 417: GoogleExpectationFailedError, | ||
101 | 422: GoogleUnprocessableEntityError, | ||
102 | 428: GooglePreconditionRequiredError, | ||
103 | 500: GoogleInternalServiceError} | ||
104 | |||
105 | |||
106 | def get_exception_for_error_code(error_code): | ||
107 | return ERROR_CODE_EXCEPTION_MAPPING.get(error_code, GoogleError) | ||
108 | |||
109 | def raise_for_error(response): | ||
110 | try: | ||
111 | response.raise_for_status() | ||
112 | except (requests.HTTPError, requests.ConnectionError) as error: | ||
113 | try: | ||
114 | content_length = len(response.content) | ||
115 | if content_length == 0: | ||
116 | # There is nothing we can do here since Google has neither sent | ||
117 | # us a 2xx response nor a response content. | ||
118 | return | ||
119 | response = response.json() | ||
120 | if ('error' in response) or ('errorCode' in response): | ||
121 | message = '%s: %s' % (response.get('error', str(error)), | ||
122 | response.get('message', 'Unknown Error')) | ||
123 | error_code = response.get('error', {}).get('code') | ||
124 | ex = get_exception_for_error_code(error_code) | ||
125 | raise ex(message) | ||
126 | else: | ||
127 | raise GoogleError(error) | ||
128 | except (ValueError, TypeError): | ||
129 | raise GoogleError(error) | ||
130 | |||
131 | class GoogleClient: # pylint: disable=too-many-instance-attributes | ||
132 | def __init__(self, | ||
133 | client_id, | ||
134 | client_secret, | ||
135 | refresh_token, | ||
136 | user_agent=None): | ||
137 | self.__client_id = client_id | ||
138 | self.__client_secret = client_secret | ||
139 | self.__refresh_token = refresh_token | ||
140 | self.__user_agent = user_agent | ||
141 | self.__access_token = None | ||
142 | self.__expires = None | ||
143 | self.__session = requests.Session() | ||
144 | self.base_url = None | ||
145 | |||
146 | |||
147 | def __enter__(self): | ||
148 | self.get_access_token() | ||
149 | return self | ||
150 | |||
151 | def __exit__(self, exception_type, exception_value, traceback): | ||
152 | self.__session.close() | ||
153 | |||
154 | @backoff.on_exception(backoff.expo, | ||
155 | Server5xxError, | ||
156 | max_tries=5, | ||
157 | factor=2) | ||
158 | def get_access_token(self): | ||
159 | # The refresh_token never expires and may be used many times to generate each access_token | ||
160 | # Since the refresh_token does not expire, it is not included in get access_token response | ||
161 | if self.__access_token is not None and self.__expires > datetime.utcnow(): | ||
162 | return | ||
163 | |||
164 | headers = {} | ||
165 | if self.__user_agent: | ||
166 | headers['User-Agent'] = self.__user_agent | ||
167 | |||
168 | response = self.__session.post( | ||
169 | url=GOOGLE_TOKEN_URI, | ||
170 | headers=headers, | ||
171 | data={ | ||
172 | 'grant_type': 'refresh_token', | ||
173 | 'client_id': self.__client_id, | ||
174 | 'client_secret': self.__client_secret, | ||
175 | 'refresh_token': self.__refresh_token, | ||
176 | }) | ||
177 | |||
178 | if response.status_code >= 500: | ||
179 | raise Server5xxError() | ||
180 | |||
181 | if response.status_code != 200: | ||
182 | raise_for_error(response) | ||
183 | |||
184 | data = response.json() | ||
185 | self.__access_token = data['access_token'] | ||
186 | self.__expires = datetime.utcnow() + timedelta(seconds=data['expires_in']) | ||
187 | LOGGER.info('Authorized, token expires = {}'.format(self.__expires)) | ||
188 | |||
189 | |||
190 | @backoff.on_exception(backoff.expo, | ||
191 | (Server5xxError, ConnectionError, Server429Error), | ||
192 | max_tries=7, | ||
193 | factor=3) | ||
194 | # Rate Limit: | ||
195 | # https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits | ||
196 | @utils.ratelimit(1200, 60) | ||
197 | def request(self, method, path=None, url=None, api=None, **kwargs): | ||
198 | |||
199 | self.get_access_token() | ||
200 | |||
201 | self.base_url = 'https://sheets.googleapis.com/v4' | ||
202 | if api == 'files': | ||
203 | self.base_url = 'https://www.googleapis.com/drive/v3' | ||
204 | |||
205 | if not url and path: | ||
206 | url = '{}/{}'.format(self.base_url, path) | ||
207 | |||
208 | # endpoint = stream_name (from sync.py API call) | ||
209 | if 'endpoint' in kwargs: | ||
210 | endpoint = kwargs['endpoint'] | ||
211 | del kwargs['endpoint'] | ||
212 | else: | ||
213 | endpoint = None | ||
214 | |||
215 | if 'headers' not in kwargs: | ||
216 | kwargs['headers'] = {} | ||
217 | kwargs['headers']['Authorization'] = 'Bearer {}'.format(self.__access_token) | ||
218 | |||
219 | if self.__user_agent: | ||
220 | kwargs['headers']['User-Agent'] = self.__user_agent | ||
221 | |||
222 | if method == 'POST': | ||
223 | kwargs['headers']['Content-Type'] = 'application/json' | ||
224 | |||
225 | with metrics.http_request_timer(endpoint) as timer: | ||
226 | response = self.__session.request(method, url, **kwargs) | ||
227 | timer.tags[metrics.Tag.http_status_code] = response.status_code | ||
228 | |||
229 | if response.status_code >= 500: | ||
230 | raise Server5xxError() | ||
231 | |||
232 | #Use retry functionality in backoff to wait and retry if | ||
233 | #response code equals 429 because rate limit has been exceeded | ||
234 | if response.status_code == 429: | ||
235 | raise Server429Error() | ||
236 | |||
237 | if response.status_code != 200: | ||
238 | raise_for_error(response) | ||
239 | |||
240 | # Ensure keys and rows are ordered as received from API | ||
241 | return response.json(object_pairs_hook=OrderedDict) | ||
242 | |||
243 | def get(self, path, api, **kwargs): | ||
244 | return self.request(method='GET', path=path, api=api, **kwargs) | ||
245 | |||
246 | def post(self, path, api, **kwargs): | ||
247 | return self.request(method='POST', path=path, api=api, **kwargs) | ||
diff --git a/tap_google_sheets/discover.py b/tap_google_sheets/discover.py new file mode 100644 index 0000000..6477a5f --- /dev/null +++ b/tap_google_sheets/discover.py | |||
@@ -0,0 +1,26 @@ | |||
1 | from singer.catalog import Catalog, CatalogEntry, Schema | ||
2 | from tap_google_sheets.schema import get_schemas, STREAMS | ||
3 | |||
4 | |||
5 | def discover(client, spreadsheet_id): | ||
6 | schemas, field_metadata = get_schemas(client, spreadsheet_id) | ||
7 | catalog = Catalog([]) | ||
8 | |||
9 | for stream_name, schema_dict in schemas.items(): | ||
10 | schema = Schema.from_dict(schema_dict) | ||
11 | mdata = field_metadata[stream_name] | ||
12 | key_properties = None | ||
13 | for md in mdata: | ||
14 | table_key_properties = md.get('metadata', {}).get('table-key-properties') | ||
15 | if table_key_properties: | ||
16 | key_properties = table_key_properties | ||
17 | |||
18 | catalog.streams.append(CatalogEntry( | ||
19 | stream=stream_name, | ||
20 | tap_stream_id=stream_name, | ||
21 | key_properties=STREAMS.get(stream_name, {}).get('key_properties', key_properties), | ||
22 | schema=schema, | ||
23 | metadata=mdata | ||
24 | )) | ||
25 | |||
26 | return catalog | ||
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py new file mode 100644 index 0000000..237ab06 --- /dev/null +++ b/tap_google_sheets/schema.py | |||
@@ -0,0 +1,228 @@ | |||
1 | import os | ||
2 | import json | ||
3 | from collections import OrderedDict | ||
4 | import singer | ||
5 | from singer import metadata | ||
6 | from tap_google_sheets.streams import STREAMS | ||
7 | |||
8 | LOGGER = singer.get_logger() | ||
9 | |||
10 | # Reference: | ||
11 | # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#Metadata | ||
12 | |||
13 | # Convert column index to column letter | ||
14 | def colnum_string(n): | ||
15 | string = "" | ||
16 | while n > 0: | ||
17 | n, remainder = divmod(n - 1, 26) | ||
18 | string = chr(65 + remainder) + string | ||
19 | return string | ||
20 | |||
21 | |||
22 | # Create sheet_metadata_json with columns from sheet | ||
23 | def get_sheet_schema_columns(sheet, spreadsheet_id, client): | ||
24 | sheet_json_schema = OrderedDict() | ||
25 | data = next(iter(sheet.get('data', [])), {}) | ||
26 | row_data = data.get('rowData',[]) | ||
27 | # spreadsheet is an OrderedDict, with orderd sheets and rows in the repsonse | ||
28 | |||
29 | headers = row_data[0].get('values', []) | ||
30 | first_values = row_data[1].get('values', []) | ||
31 | # LOGGER.info('first_values = {}'.format(json.dumps(first_values, indent=2, sort_keys=True))) | ||
32 | |||
33 | sheet_json_schema['type'] = 'object' | ||
34 | sheet_json_schema['additionalProperties'] = False | ||
35 | sheet_json_schema = { | ||
36 | 'type': 'object', | ||
37 | 'additionalProperties': False, | ||
38 | 'properties': { | ||
39 | '__sdc_spreadsheet_id': { | ||
40 | 'type': ['null', 'string'] | ||
41 | }, | ||
42 | '__sdc_sheet_id': { | ||
43 | 'type': ['null', 'integer'] | ||
44 | }, | ||
45 | '__sdc_row': { | ||
46 | 'type': ['null', 'integer'] | ||
47 | } | ||
48 | } | ||
49 | } | ||
50 | |||
51 | header_list = [] # used for checking uniqueness | ||
52 | columns = [] | ||
53 | prior_header = None | ||
54 | i = 0 | ||
55 | skipped = 0 | ||
56 | # Read column headers until end or 2 consecutive skipped headers | ||
57 | for header in headers: | ||
58 | # LOGGER.info('header = {}'.format(json.dumps(header, indent=2, sort_keys=True))) | ||
59 | column_index = i + 1 | ||
60 | column_letter = colnum_string(column_index) | ||
61 | header_value = header.get('formattedValue') | ||
62 | if header_value: # NOT skipped | ||
63 | column_is_skipped = False | ||
64 | skipped = 0 | ||
65 | column_name = '{}'.format(header_value) | ||
66 | if column_name in header_list: | ||
67 | raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name)) | ||
68 | else: | ||
69 | header_list.append(column_name) | ||
70 | |||
71 | first_value = first_values[i] | ||
72 | # LOGGER.info('first_value[{}] = {}'.format(i, json.dumps(first_value, indent=2, sort_keys=True))) | ||
73 | |||
74 | column_effective_value = first_value.get('effectiveValue', {}) | ||
75 | for key in column_effective_value.keys(): | ||
76 | if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'): | ||
77 | column_effective_value_type = key | ||
78 | |||
79 | column_number_format = first_values[i].get('effectiveFormat', {}).get('numberFormat', {}) | ||
80 | column_number_format_type = column_number_format.get('type') | ||
81 | |||
82 | # Determine datatype for sheet_json_schema | ||
83 | # | ||
84 | # column_effective_value_type = numberValue, stringValue, boolValue; INVALID: errorType, formulaType | ||
85 | # Reference: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue | ||
86 | # | ||
87 | # column_number_format_type = UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, TIME, DATE_TIME, SCIENTIFIC | ||
88 | # Reference: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType | ||
89 | # | ||
90 | column_format = None # Default | ||
91 | # column_multiple_of = None # Default | ||
92 | if column_effective_value_type in ('formulaValue', 'errorValue'): | ||
93 | raise Exception('INVALID DATA TYPE ERROR: {}, value: {}'.format(column_name)) | ||
94 | elif column_effective_value_type == 'stringValue': | ||
95 | column_type = ['null', 'string'] | ||
96 | column_gs_type = 'stringValue' | ||
97 | elif column_effective_value_type == 'boolValue': | ||
98 | column_type = ['null', 'boolean', 'string'] | ||
99 | column_gs_type = 'boolValue' | ||
100 | elif column_effective_value_type == 'numberValue': | ||
101 | if column_number_format_type == 'DATE_TIME': | ||
102 | column_type = ['null', 'string'] | ||
103 | column_format = 'date-time' | ||
104 | column_gs_type = 'numberType.DATE_TIME' | ||
105 | elif column_number_format_type == 'DATE': | ||
106 | column_type = ['null', 'string'] | ||
107 | column_format = 'date' | ||
108 | column_gs_type = 'numberType.DATE' | ||
109 | elif column_number_format_type == 'TIME': | ||
110 | column_type = ['null', 'string'] | ||
111 | column_format = 'time' | ||
112 | column_gs_type = 'numberType.TIME' | ||
113 | elif column_number_format_type == 'TEXT': | ||
114 | column_type = ['null', 'string'] | ||
115 | column_gs_type = 'stringValue' | ||
116 | else: | ||
117 | column_type = ['null', 'number', 'string'] | ||
118 | column_gs_type = 'numberType' | ||
119 | |||
120 | else: # skipped | ||
121 | column_is_skipped = True | ||
122 | skipped = skipped + 1 | ||
123 | column_index_str = str(column_index).zfill(2) | ||
124 | column_name = '__sdc_skip_col_{}'.format(column_index_str) | ||
125 | column_type = ['null', 'string'] | ||
126 | column_format = None | ||
127 | column_gs_type = 'stringValue' | ||
128 | |||
129 | if skipped >= 2: | ||
130 | # skipped = 2 consecutive skipped headers | ||
131 | # Remove prior_header column_name | ||
132 | sheet_json_schema['properties'].pop(prior_header, None) | ||
133 | column_count = i - 1 | ||
134 | break | ||
135 | |||
136 | else: | ||
137 | column = {} | ||
138 | column = { | ||
139 | 'columnIndex': column_index, | ||
140 | 'columnLetter': column_letter, | ||
141 | 'columnName': column_name, | ||
142 | 'columnType': column_gs_type, | ||
143 | 'columnSkipped': column_is_skipped | ||
144 | } | ||
145 | columns.append(column) | ||
146 | |||
147 | sheet_json_schema['properties'][column_name] = column | ||
148 | sheet_json_schema['properties'][column_name]['type'] = column_type | ||
149 | if column_format: | ||
150 | sheet_json_schema['properties'][column_name]['format'] = column_format | ||
151 | |||
152 | prior_header = column_name | ||
153 | i = i + 1 | ||
154 | |||
155 | return sheet_json_schema, columns | ||
156 | |||
157 | |||
158 | def get_sheet_metadata(sheet, spreadsheet_id, client): | ||
159 | sheet_id = sheet.get('properties', {}).get('sheetId') | ||
160 | sheet_title = sheet.get('properties', {}).get('title') | ||
161 | LOGGER.info('sheet_id = {}, sheet_title = {}'.format(sheet_id, sheet_title)) | ||
162 | |||
163 | stream_name = 'sheet_metadata' | ||
164 | stream_metadata = STREAMS.get(stream_name) | ||
165 | api = stream_metadata.get('api', 'sheets') | ||
166 | params = stream_metadata.get('params', {}) | ||
167 | querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace('{sheet_title}', sheet_title) | ||
168 | path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', spreadsheet_id), querystring) | ||
169 | |||
170 | sheet_md_results = client.get(path=path, api=api, endpoint=stream_name) | ||
171 | sheet_cols = sheet_md_results.get('sheets')[0] | ||
172 | sheet_schema, columns = get_sheet_schema_columns(sheet_cols, spreadsheet_id, client) | ||
173 | |||
174 | return sheet_schema, columns | ||
175 | |||
176 | |||
177 | def get_abs_path(path): | ||
178 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) | ||
179 | |||
180 | def get_schemas(client, spreadsheet_id): | ||
181 | schemas = {} | ||
182 | field_metadata = {} | ||
183 | |||
184 | for stream_name, stream_metadata in STREAMS.items(): | ||
185 | schema_path = get_abs_path('schemas/{}.json'.format(stream_name)) | ||
186 | with open(schema_path) as file: | ||
187 | schema = json.load(file) | ||
188 | schemas[stream_name] = schema | ||
189 | mdata = metadata.new() | ||
190 | |||
191 | # Documentation: | ||
192 | # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions | ||
193 | # Reference: | ||
194 | # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44 | ||
195 | mdata = metadata.get_standard_metadata( | ||
196 | schema=schema, | ||
197 | key_properties=stream_metadata.get('key_properties', None), | ||
198 | valid_replication_keys=stream_metadata.get('replication_keys', None), | ||
199 | replication_method=stream_metadata.get('replication_method', None) | ||
200 | ) | ||
201 | field_metadata[stream_name] = mdata | ||
202 | |||
203 | if stream_name == 'spreadsheet_metadata': | ||
204 | api = stream_metadata.get('api', 'sheets') | ||
205 | params = stream_metadata.get('params', {}) | ||
206 | querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]) | ||
207 | path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', spreadsheet_id), querystring) | ||
208 | |||
209 | spreadsheet_md_results = client.get(path=path, params=querystring, api=api, endpoint=stream_name) | ||
210 | |||
211 | sheets = spreadsheet_md_results.get('sheets') | ||
212 | if sheets: | ||
213 | for sheet in sheets: | ||
214 | sheet_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) | ||
215 | # LOGGER.info('sheet_schema = {}'.format(json.dumps(sheet_schema, indent=2, sort_keys=True))) | ||
216 | |||
217 | sheet_title = sheet.get('properties', {}).get('title') | ||
218 | schemas[sheet_title] = sheet_schema | ||
219 | sheet_mdata = metadata.new() | ||
220 | sheet_mdata = metadata.get_standard_metadata( | ||
221 | schema=sheet_schema, | ||
222 | key_properties=['__sdc_row'], | ||
223 | valid_replication_keys=None, | ||
224 | replication_method='FULL_TABLE' | ||
225 | ) | ||
226 | field_metadata[sheet_title] = sheet_mdata | ||
227 | |||
228 | return schemas, field_metadata | ||
diff --git a/tap_google_sheets/schemas/file_metadata.json b/tap_google_sheets/schemas/file_metadata.json new file mode 100644 index 0000000..25c19c4 --- /dev/null +++ b/tap_google_sheets/schemas/file_metadata.json | |||
@@ -0,0 +1,44 @@ | |||
1 | { | ||
2 | "type": "object", | ||
3 | "additionalProperties": false, | ||
4 | "properties": { | ||
5 | "id": { | ||
6 | "type": ["null", "string"] | ||
7 | }, | ||
8 | "name": { | ||
9 | "type": ["null", "string"] | ||
10 | }, | ||
11 | "version": { | ||
12 | "type": ["null", "integer"] | ||
13 | }, | ||
14 | "createdTime": { | ||
15 | "type": ["null", "string"], | ||
16 | "format": "date-time" | ||
17 | }, | ||
18 | "modifiedTime": { | ||
19 | "type": ["null", "string"], | ||
20 | "format": "date-time" | ||
21 | }, | ||
22 | "teamDriveId": { | ||
23 | "type": ["null", "string"] | ||
24 | }, | ||
25 | "driveId": { | ||
26 | "type": ["null", "string"] | ||
27 | }, | ||
28 | "lastModifyingUser": { | ||
29 | "type": ["null", "object"], | ||
30 | "additionalProperties": false, | ||
31 | "properties": { | ||
32 | "kind": { | ||
33 | "type": ["null", "integer"] | ||
34 | }, | ||
35 | "displayName": { | ||
36 | "type": ["null", "string"] | ||
37 | }, | ||
38 | "emailAdress": { | ||
39 | "type": ["null", "string"] | ||
40 | } | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | } | ||
diff --git a/tap_google_sheets/schemas/sheet_metadata.json b/tap_google_sheets/schemas/sheet_metadata.json new file mode 100644 index 0000000..c3f2ac2 --- /dev/null +++ b/tap_google_sheets/schemas/sheet_metadata.json | |||
@@ -0,0 +1,89 @@ | |||
1 | { | ||
2 | "type": "object", | ||
3 | "additionalProperties": false, | ||
4 | "properties": { | ||
5 | "spreadsheetId": { | ||
6 | "type": ["null", "string"] | ||
7 | }, | ||
8 | "sheetId": { | ||
9 | "type": ["null", "integer"] | ||
10 | }, | ||
11 | "title": { | ||
12 | "type": ["null", "string"] | ||
13 | }, | ||
14 | "index": { | ||
15 | "type": ["null", "integer"] | ||
16 | }, | ||
17 | "sheetType": { | ||
18 | "type": ["null", "string"] | ||
19 | }, | ||
20 | "sheetUrl": { | ||
21 | "type": ["null", "string"] | ||
22 | }, | ||
23 | "gridProperties": { | ||
24 | "type": ["null", "object"], | ||
25 | "additionalProperties": false, | ||
26 | "properties": { | ||
27 | "rowCount": { | ||
28 | "type": ["null", "integer"] | ||
29 | }, | ||
30 | "columnCount": { | ||
31 | "type": ["null", "integer"] | ||
32 | }, | ||
33 | "frozenRowCount": { | ||
34 | "type": ["null", "integer"] | ||
35 | }, | ||
36 | "frozenColumnCount": { | ||
37 | "type": ["null", "integer"] | ||
38 | } | ||
39 | } | ||
40 | }, | ||
41 | "columns": { | ||
42 | "anyOf": [ | ||
43 | { | ||
44 | "type": "array", | ||
45 | "items": { | ||
46 | "type": ["null", "object"], | ||
47 | "additionalProperties": false, | ||
48 | "properties": { | ||
49 | "columnIndex": { | ||
50 | "type": ["null", "integer"] | ||
51 | }, | ||
52 | "columnLetter": { | ||
53 | "type": ["null", "string"] | ||
54 | }, | ||
55 | "columnName": { | ||
56 | "type": ["null", "string"] | ||
57 | }, | ||
58 | "columnType": { | ||
59 | "type": ["null", "string"] | ||
60 | }, | ||
61 | "columnSkipped": { | ||
62 | "type": ["null", "boolean"] | ||
63 | }, | ||
64 | "type": { | ||
65 | "anyOf": [ | ||
66 | { | ||
67 | "type": "array", | ||
68 | "items": { | ||
69 | "type": "string" | ||
70 | } | ||
71 | }, | ||
72 | { | ||
73 | "type": "null" | ||
74 | } | ||
75 | ] | ||
76 | }, | ||
77 | "format": { | ||
78 | "type": ["null", "string"] | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | }, | ||
83 | { | ||
84 | "type": "null" | ||
85 | } | ||
86 | ] | ||
87 | } | ||
88 | } | ||
89 | } | ||
diff --git a/tap_google_sheets/schemas/sheets_loaded.json b/tap_google_sheets/schemas/sheets_loaded.json new file mode 100644 index 0000000..12f967a --- /dev/null +++ b/tap_google_sheets/schemas/sheets_loaded.json | |||
@@ -0,0 +1,22 @@ | |||
1 | { | ||
2 | "type": "object", | ||
3 | "additionalProperties": false, | ||
4 | "properties": { | ||
5 | "spreadsheetId": { | ||
6 | "type": ["null", "string"] | ||
7 | }, | ||
8 | "sheetId": { | ||
9 | "type": ["null", "integer"] | ||
10 | }, | ||
11 | "sheetTitle": { | ||
12 | "type": ["null", "string"] | ||
13 | }, | ||
14 | "loadDate": { | ||
15 | "type": ["null", "string"], | ||
16 | "format": "date-time" | ||
17 | }, | ||
18 | "lastRowNumber": { | ||
19 | "type": ["null", "integer"] | ||
20 | } | ||
21 | } | ||
22 | } | ||
diff --git a/tap_google_sheets/schemas/spreadsheet_metadata.json b/tap_google_sheets/schemas/spreadsheet_metadata.json new file mode 100644 index 0000000..852cb76 --- /dev/null +++ b/tap_google_sheets/schemas/spreadsheet_metadata.json | |||
@@ -0,0 +1,30 @@ | |||
1 | { | ||
2 | "type": "object", | ||
3 | "additionalProperties": false, | ||
4 | "properties": { | ||
5 | "spreadsheetId": { | ||
6 | "type": ["null", "string"] | ||
7 | }, | ||
8 | "properties": { | ||
9 | "type": ["null", "object"], | ||
10 | "additionalProperties": false, | ||
11 | "properties": { | ||
12 | "title": { | ||
13 | "type": ["null", "string"] | ||
14 | }, | ||
15 | "locale": { | ||
16 | "type": ["null", "string"] | ||
17 | }, | ||
18 | "autoRecalc": { | ||
19 | "type": ["null", "string"] | ||
20 | }, | ||
21 | "timeZone": { | ||
22 | "type": ["null", "string"] | ||
23 | } | ||
24 | } | ||
25 | }, | ||
26 | "spreadsheetUrl": { | ||
27 | "type": ["null", "string"] | ||
28 | } | ||
29 | } | ||
30 | } | ||
diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py new file mode 100644 index 0000000..299326a --- /dev/null +++ b/tap_google_sheets/streams.py | |||
@@ -0,0 +1,66 @@ | |||
1 | from collections import OrderedDict | ||
2 | |||
3 | # streams: API URL endpoints to be called | ||
4 | # properties: | ||
5 | # <root node>: Plural stream name for the endpoint | ||
6 | # path: API endpoint relative path, when added to the base URL, creates the full path, | ||
7 | # default = stream_name | ||
8 | # key_properties: Primary key fields for identifying an endpoint record. | ||
9 | # replication_method: INCREMENTAL or FULL_TABLE | ||
10 | # replication_keys: bookmark_field(s), typically a date-time, used for filtering the results | ||
11 | # and setting the state | ||
12 | # params: Query, sort, and other endpoint specific parameters; default = {} | ||
13 | # data_key: JSON element containing the results list for the endpoint; default = root (no data_key) | ||
14 | # bookmark_query_field: From date-time field used for filtering the query | ||
15 | # bookmark_type: Data type for bookmark, integer or datetime | ||
16 | |||
17 | FILE_METADATA = { | ||
18 | "api": "files", | ||
19 | "path": "files/{spreadsheet_id}", | ||
20 | "key_properties": ["id"], | ||
21 | "replication_method": "FULL_TABLE", | ||
22 | "params": { | ||
23 | "fields": "id,name,createdTime,modifiedTime,version,teamDriveId,driveId,lastModifyingUser" | ||
24 | } | ||
25 | } | ||
26 | |||
27 | SPREADSHEET_METADATA = { | ||
28 | "api": "sheets", | ||
29 | "path": "spreadsheets/{spreadsheet_id}", | ||
30 | "key_properties": ["spreadsheetId"], | ||
31 | "replication_method": "FULL_TABLE", | ||
32 | "params": { | ||
33 | "includeGridData": "false" | ||
34 | } | ||
35 | } | ||
36 | |||
37 | SHEET_METADATA = { | ||
38 | "api": "sheets", | ||
39 | "path": "spreadsheets/{spreadsheet_id}", | ||
40 | "key_properties": ["sheetId"], | ||
41 | "replication_method": "FULL_TABLE", | ||
42 | "params": { | ||
43 | "includeGridData": "true", | ||
44 | "ranges": "'{sheet_title}'!1:2" | ||
45 | } | ||
46 | } | ||
47 | |||
48 | SHEETS_LOADED = { | ||
49 | "api": "sheets", | ||
50 | "path": "spreadsheets/{spreadsheet_id}/values/'{sheet_title}'!{range_rows}", | ||
51 | "data_key": "values", | ||
52 | "key_properties": ["spreadsheetId", "sheetId", "loadDate"], | ||
53 | "replication_method": "FULL_TABLE", | ||
54 | "params": { | ||
55 | "dateTimeRenderOption": "SERIAL_NUMBER", | ||
56 | "valueRenderOption": "UNFORMATTED_VALUE", | ||
57 | "majorDimension": "ROWS" | ||
58 | } | ||
59 | } | ||
60 | |||
61 | # Ensure streams are ordered logically | ||
62 | STREAMS = OrderedDict() | ||
63 | STREAMS['file_metadata'] = FILE_METADATA | ||
64 | STREAMS['spreadsheet_metadata'] = SPREADSHEET_METADATA | ||
65 | STREAMS['sheet_metadata'] = SHEET_METADATA | ||
66 | STREAMS['sheets_loaded'] = SHEETS_LOADED | ||
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py new file mode 100644 index 0000000..a8b02d0 --- /dev/null +++ b/tap_google_sheets/sync.py | |||
@@ -0,0 +1,281 @@ | |||
1 | import time | ||
2 | import math | ||
3 | import singer | ||
4 | import json | ||
5 | from collections import OrderedDict | ||
6 | from singer import metrics, metadata, Transformer, utils | ||
7 | from singer.utils import strptime_to_utc, strftime | ||
8 | from tap_google_sheets.transform import transform_json | ||
9 | from tap_google_sheets.streams import STREAMS | ||
10 | from tap_google_sheets.schema import get_sheet_metadata | ||
11 | |||
12 | LOGGER = singer.get_logger() | ||
13 | |||
14 | |||
15 | def write_schema(catalog, stream_name): | ||
16 | stream = catalog.get_stream(stream_name) | ||
17 | schema = stream.schema.to_dict() | ||
18 | try: | ||
19 | singer.write_schema(stream_name, schema, stream.key_properties) | ||
20 | except OSError as err: | ||
21 | LOGGER.info('OS Error writing schema for: {}'.format(stream_name)) | ||
22 | raise err | ||
23 | |||
24 | |||
25 | def write_record(stream_name, record, time_extracted): | ||
26 | try: | ||
27 | singer.messages.write_record(stream_name, record, time_extracted=time_extracted) | ||
28 | except OSError as err: | ||
29 | LOGGER.info('OS Error writing record for: {}'.format(stream_name)) | ||
30 | LOGGER.info('record: {}'.format(record)) | ||
31 | raise err | ||
32 | |||
33 | |||
34 | def get_bookmark(state, stream, default): | ||
35 | if (state is None) or ('bookmarks' not in state): | ||
36 | return default | ||
37 | return ( | ||
38 | state | ||
39 | .get('bookmarks', {}) | ||
40 | .get(stream, default) | ||
41 | ) | ||
42 | |||
43 | |||
44 | def write_bookmark(state, stream, value): | ||
45 | if 'bookmarks' not in state: | ||
46 | state['bookmarks'] = {} | ||
47 | state['bookmarks'][stream] = value | ||
48 | LOGGER.info('Write state for stream: {}, value: {}'.format(stream, value)) | ||
49 | singer.write_state(state) | ||
50 | |||
51 | |||
52 | # def transform_datetime(this_dttm): | ||
53 | def transform_datetime(this_dttm): | ||
54 | with Transformer() as transformer: | ||
55 | new_dttm = transformer._transform_datetime(this_dttm) | ||
56 | return new_dttm | ||
57 | |||
58 | |||
59 | def process_records(catalog, #pylint: disable=too-many-branches | ||
60 | stream_name, | ||
61 | records, | ||
62 | time_extracted, | ||
63 | bookmark_field=None, | ||
64 | bookmark_type=None, | ||
65 | max_bookmark_value=None, | ||
66 | last_datetime=None, | ||
67 | last_integer=None, | ||
68 | parent=None, | ||
69 | parent_id=None): | ||
70 | stream = catalog.get_stream(stream_name) | ||
71 | schema = stream.schema.to_dict() | ||
72 | stream_metadata = metadata.to_map(stream.metadata) | ||
73 | |||
74 | with metrics.record_counter(stream_name) as counter: | ||
75 | for record in records: | ||
76 | # If child object, add parent_id to record | ||
77 | if parent_id and parent: | ||
78 | record[parent + '_id'] = parent_id | ||
79 | |||
80 | # Transform record for Singer.io | ||
81 | with Transformer() as transformer: | ||
82 | transformed_record = transformer.transform( | ||
83 | record, | ||
84 | schema, | ||
85 | stream_metadata) | ||
86 | # Reset max_bookmark_value to new value if higher | ||
87 | if transformed_record.get(bookmark_field): | ||
88 | if max_bookmark_value is None or \ | ||
89 | transformed_record[bookmark_field] > transform_datetime(max_bookmark_value): | ||
90 | max_bookmark_value = transformed_record[bookmark_field] | ||
91 | |||
92 | if bookmark_field and (bookmark_field in transformed_record): | ||
93 | if bookmark_type == 'integer': | ||
94 | # Keep only records whose bookmark is after the last_integer | ||
95 | if transformed_record[bookmark_field] >= last_integer: | ||
96 | write_record(stream_name, transformed_record, \ | ||
97 | time_extracted=time_extracted) | ||
98 | counter.increment() | ||
99 | elif bookmark_type == 'datetime': | ||
100 | last_dttm = transform_datetime(last_datetime) | ||
101 | bookmark_dttm = transform_datetime(transformed_record[bookmark_field]) | ||
102 | # Keep only records whose bookmark is after the last_datetime | ||
103 | if bookmark_dttm >= last_dttm: | ||
104 | write_record(stream_name, transformed_record, \ | ||
105 | time_extracted=time_extracted) | ||
106 | counter.increment() | ||
107 | else: | ||
108 | write_record(stream_name, transformed_record, time_extracted=time_extracted) | ||
109 | counter.increment() | ||
110 | |||
111 | return max_bookmark_value, counter.value | ||
112 | |||
113 | |||
114 | # Currently syncing sets the stream currently being delivered in the state. | ||
115 | # If the integration is interrupted, this state property is used to identify | ||
116 | # the starting point to continue from. | ||
117 | # Reference: https://github.com/singer-io/singer-python/blob/master/singer/bookmarks.py#L41-L46 | ||
118 | def update_currently_syncing(state, stream_name): | ||
119 | if (stream_name is None) and ('currently_syncing' in state): | ||
120 | del state['currently_syncing'] | ||
121 | else: | ||
122 | singer.set_currently_syncing(state, stream_name) | ||
123 | singer.write_state(state) | ||
124 | |||
125 | |||
126 | # List selected fields from stream catalog | ||
127 | def get_selected_fields(catalog, stream_name): | ||
128 | stream = catalog.get_stream(stream_name) | ||
129 | mdata = metadata.to_map(stream.metadata) | ||
130 | mdata_list = singer.metadata.to_list(mdata) | ||
131 | selected_fields = [] | ||
132 | for entry in mdata_list: | ||
133 | field = None | ||
134 | try: | ||
135 | field = entry['breadcrumb'][1] | ||
136 | if entry.get('metadata', {}).get('selected', False): | ||
137 | selected_fields.append(field) | ||
138 | except IndexError: | ||
139 | pass | ||
140 | return selected_fields | ||
141 | |||
142 | |||
143 | def get_data(stream_name, | ||
144 | endpoint_config, | ||
145 | client, | ||
146 | spreadsheet_id, | ||
147 | range_rows=None): | ||
148 | if not range_rows: | ||
149 | range_rows = '' | ||
150 | path = endpoint_config.get('path', stream_name).replace( | ||
151 | '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name).replace( | ||
152 | '{range_rows}', range_rows) | ||
153 | params = endpoint_config.get('params', {}) | ||
154 | api = endpoint_config.get('api', 'sheets') | ||
155 | querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace( | ||
156 | '{sheet_title}', stream_name) | ||
157 | data = {} | ||
158 | data = client.get( | ||
159 | path=path, | ||
160 | api=api, | ||
161 | params=querystring, | ||
162 | endpoint=stream_name) | ||
163 | return data | ||
164 | |||
165 | |||
166 | def transform_file_metadata(file_metadata): | ||
167 | # Convert to dict | ||
168 | file_metadata_tf = json.loads(json.dumps(file_metadata)) | ||
169 | # Remove keys | ||
170 | if file_metadata_tf.get('lastModifyingUser'): | ||
171 | file_metadata_tf['lastModifyingUser'].pop('photoLink', None) | ||
172 | file_metadata_tf['lastModifyingUser'].pop('me', None) | ||
173 | file_metadata_tf['lastModifyingUser'].pop('permissionId', None) | ||
174 | # Add record to an array of 1 | ||
175 | file_metadata_arr = [] | ||
176 | file_metadata_arr.append(file_metadata_tf) | ||
177 | return file_metadata_arr | ||
178 | |||
179 | |||
180 | def transform_spreadsheet_metadata(spreadsheet_metadata): | ||
181 | # Convert to dict | ||
182 | spreadsheet_metadata_tf = json.loads(json.dumps(spreadsheet_metadata)) | ||
183 | # Remove keys | ||
184 | if spreadsheet_metadata_tf.get('properties'): | ||
185 | spreadsheet_metadata_tf['properties'].pop('defaultFormat', None) | ||
186 | spreadsheet_metadata_tf.pop('sheets', None) | ||
187 | # Add record to an array of 1 | ||
188 | spreadsheet_metadata_arr = [] | ||
189 | spreadsheet_metadata_arr.append(spreadsheet_metadata_tf) | ||
190 | return spreadsheet_metadata_arr | ||
191 | |||
192 | |||
193 | def transform_sheet_metadata(spreadsheet_id, sheet, columns): | ||
194 | # Convert to properties to dict | ||
195 | sheet_metadata = sheet.get('properties') | ||
196 | sheet_metadata_tf = json.loads(json.dumps(sheet_metadata)) | ||
197 | sheet_id = sheet_metadata_tf.get('sheetId') | ||
198 | sheet_url = 'https://docs.google.com/spreadsheets/d/{}/edit#gid={}'.format( | ||
199 | spreadsheet_id, sheet_id) | ||
200 | sheet_metadata_tf['spreadsheetId'] = spreadsheet_id | ||
201 | sheet_metadata_tf['sheetUrl'] = sheet_url | ||
202 | sheet_metadata_tf['columns'] = columns | ||
203 | return sheet_metadata_tf | ||
204 | |||
205 | |||
206 | def sync(client, config, catalog, state): | ||
207 | start_date = config.get('start_date') | ||
208 | spreadsheet_id = config.get('spreadsheet_id') | ||
209 | |||
210 | # Get selected_streams from catalog, based on state last_stream | ||
211 | # last_stream = Previous currently synced stream, if the load was interrupted | ||
212 | last_stream = singer.get_currently_syncing(state) | ||
213 | LOGGER.info('last/currently syncing stream: {}'.format(last_stream)) | ||
214 | selected_streams = [] | ||
215 | for stream in catalog.get_selected_streams(state): | ||
216 | selected_streams.append(stream.stream) | ||
217 | LOGGER.info('selected_streams: {}'.format(selected_streams)) | ||
218 | |||
219 | if not selected_streams: | ||
220 | return | ||
221 | |||
222 | # Get file_metadata | ||
223 | file_metadata = {} | ||
224 | file_metadata_config = STREAMS.get('file_metadata') | ||
225 | file_metadata = get_data('file_metadata', file_metadata_config, client, spreadsheet_id) | ||
226 | file_metadata_tf = transform_file_metadata(file_metadata) | ||
227 | # LOGGER.info('file_metadata_tf = {}'.format(file_metadata_tf)) | ||
228 | last_datetime = strptime_to_utc(get_bookmark(state, 'file_metadata', start_date)) | ||
229 | this_datetime = strptime_to_utc(file_metadata.get('modifiedTime')) | ||
230 | LOGGER.info('last_datetime = {}, this_datetime = {}'.format(last_datetime, this_datetime)) | ||
231 | if this_datetime <= last_datetime: | ||
232 | LOGGER.info('this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.') | ||
233 | return 0 | ||
234 | |||
235 | # Get spreadsheet_metadata | ||
236 | spreadsheet_metadata = {} | ||
237 | spreadsheet_metadata_config = STREAMS.get('spreadsheet_metadata') | ||
238 | spreadsheet_metadata = get_data('spreadsheet_metadata', spreadsheet_metadata_config, client, spreadsheet_id) | ||
239 | spreadsheet_metadata_tf = transform_spreadsheet_metadata(spreadsheet_metadata) | ||
240 | # LOGGER.info('spreadsheet_metadata_tf = {}'.format(spreadsheet_metadata_tf)) | ||
241 | |||
242 | # Get sheet_metadata | ||
243 | sheets = spreadsheet_metadata.get('sheets') | ||
244 | sheet_metadata = [] | ||
245 | sheets_loaded = [] | ||
246 | sheets_loaded_config = STREAMS['sheets_loaded'] | ||
247 | if sheets: | ||
248 | for sheet in sheets: | ||
249 | sheet_title = sheet.get('properties', {}).get('title') | ||
250 | sheet_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client) | ||
251 | sheet_metadata_tf = transform_sheet_metadata(spreadsheet_id, sheet, columns) | ||
252 | # LOGGER.info('sheet_metadata_tf = {}'.format(sheet_metadata_tf)) | ||
253 | sheet_metadata.append(sheet_metadata_tf) | ||
254 | |||
255 | # Determine range of rows and columns for "paging" through batch rows of data | ||
256 | sheet_last_col_index = 1 | ||
257 | sheet_last_col_letter = 'A' | ||
258 | for col in columns: | ||
259 | col_index = col.get('columnIndex') | ||
260 | col_letter = col.get('columnLetter') | ||
261 | if col_index > sheet_last_col_index: | ||
262 | sheet_last_col_index = col_index | ||
263 | sheet_last_col_letter = col_letter | ||
264 | sheet_max_row = sheet.get('gridProperties', {}).get('rowCount') | ||
265 | is_empty_row = False | ||
266 | batch_rows = 200 | ||
267 | from_row = 2 | ||
268 | if sheet_max_row < batch_rows: | ||
269 | to_row = sheet_max_row | ||
270 | else: | ||
271 | to_row = batch_rows | ||
272 | |||
273 | while not is_empty_row and to_row <= sheet_max_row: | ||
274 | range_rows = 'A2:{}{}'.format(sheet_last_col_letter, to_row) | ||
275 | |||
276 | sheet_data = get_data( | ||
277 | stream_name=sheet_title, | ||
278 | endpoint_config=sheets_loaded_config, | ||
279 | client=client, | ||
280 | spreadsheet_id=spreadsheet_id, | ||
281 | range_rows=range_rows) | ||