aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJeff Huth <jeff.huth@bytecode.io>2019-11-13 17:03:56 -0800
committerJeff Huth <jeff.huth@bytecode.io>2019-11-13 17:03:56 -0800
commit89643ba6fa98db82efd3246805ef801a8bfb5c81 (patch)
tree739027b4e827def2db81631c9d6ed58ec2b97809
parent5f8005471d3affaaf23489df93a58ca64c3da3ca (diff)
downloadtap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.gz
tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.tar.zst
tap-google-sheets-89643ba6fa98db82efd3246805ef801a8bfb5c81.zip
Initial commit
Discovery mode works. Still working on normal sync.
-rw-r--r--.gitignore22
-rw-r--r--CHANGELOG.md4
-rw-r--r--LICENSE661
-rw-r--r--MANIFEST.in2
-rw-r--r--README.md173
-rw-r--r--config.json.example8
-rw-r--r--setup.py25
-rw-r--r--state.json.example6
-rw-r--r--tap_google_sheets/__init__.py57
-rw-r--r--tap_google_sheets/client.py247
-rw-r--r--tap_google_sheets/discover.py26
-rw-r--r--tap_google_sheets/schema.py228
-rw-r--r--tap_google_sheets/schemas/file_metadata.json44
-rw-r--r--tap_google_sheets/schemas/sheet_metadata.json89
-rw-r--r--tap_google_sheets/schemas/sheets_loaded.json22
-rw-r--r--tap_google_sheets/schemas/spreadsheet_metadata.json30
-rw-r--r--tap_google_sheets/streams.py66
-rw-r--r--tap_google_sheets/sync.py281
18 files changed, 1990 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d9633da
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,22 @@
1/.vscode/*
2/.ipynb_checkpoints/*
3config/
4virtualenvs/
5*catalog*.json
6*config*.json
7*state*.json
8target*.json
9*.sublime-*
10.python-version
11singer-check-tap-data
12*.pyc
13*.egg-info
14dist/
15__pycache__/
16venv/
17build/
18tap_google_sheets/.vscode/settings.json
19*.ipynb
20.DS_Store
21test_configuration.py
22tap_target_commands.sh
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..d58f396
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,4 @@
1# Changelog
2
3## 0.0.1
4 * Initial commit
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0ad25db
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,661 @@
1 GNU AFFERO GENERAL PUBLIC LICENSE
2 Version 3, 19 November 2007
3
4 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5 Everyone is permitted to copy and distribute verbatim copies
6 of this license document, but changing it is not allowed.
7
8 Preamble
9
10 The GNU Affero General Public License is a free, copyleft license for
11software and other kinds of works, specifically designed to ensure
12cooperation with the community in the case of network server software.
13
14 The licenses for most software and other practical works are designed
15to take away your freedom to share and change the works. By contrast,
16our General Public Licenses are intended to guarantee your freedom to
17share and change all versions of a program--to make sure it remains free
18software for all its users.
19
20 When we speak of free software, we are referring to freedom, not
21price. Our General Public Licenses are designed to make sure that you
22have the freedom to distribute copies of free software (and charge for
23them if you wish), that you receive source code or can get it if you
24want it, that you can change the software or use pieces of it in new
25free programs, and that you know you can do these things.
26
27 Developers that use our General Public Licenses protect your rights
28with two steps: (1) assert copyright on the software, and (2) offer
29you this License which gives you legal permission to copy, distribute
30and/or modify the software.
31
32 A secondary benefit of defending all users' freedom is that
33improvements made in alternate versions of the program, if they
34receive widespread use, become available for other developers to
35incorporate. Many developers of free software are heartened and
36encouraged by the resulting cooperation. However, in the case of
37software used on network servers, this result may fail to come about.
38The GNU General Public License permits making a modified version and
39letting the public access it on a server without ever releasing its
40source code to the public.
41
42 The GNU Affero General Public License is designed specifically to
43ensure that, in such cases, the modified source code becomes available
44to the community. It requires the operator of a network server to
45provide the source code of the modified version running there to the
46users of that server. Therefore, public use of a modified version, on
47a publicly accessible server, gives the public access to the source
48code of the modified version.
49
50 An older license, called the Affero General Public License and
51published by Affero, was designed to accomplish similar goals. This is
52a different license, not a version of the Affero GPL, but Affero has
53released a new version of the Affero GPL which permits relicensing under
54this license.
55
56 The precise terms and conditions for copying, distribution and
57modification follow.
58
59 TERMS AND CONDITIONS
60
61 0. Definitions.
62
63 "This License" refers to version 3 of the GNU Affero General Public License.
64
65 "Copyright" also means copyright-like laws that apply to other kinds of
66works, such as semiconductor masks.
67
68 "The Program" refers to any copyrightable work licensed under this
69License. Each licensee is addressed as "you". "Licensees" and
70"recipients" may be individuals or organizations.
71
72 To "modify" a work means to copy from or adapt all or part of the work
73in a fashion requiring copyright permission, other than the making of an
74exact copy. The resulting work is called a "modified version" of the
75earlier work or a work "based on" the earlier work.
76
77 A "covered work" means either the unmodified Program or a work based
78on the Program.
79
80 To "propagate" a work means to do anything with it that, without
81permission, would make you directly or secondarily liable for
82infringement under applicable copyright law, except executing it on a
83computer or modifying a private copy. Propagation includes copying,
84distribution (with or without modification), making available to the
85public, and in some countries other activities as well.
86
87 To "convey" a work means any kind of propagation that enables other
88parties to make or receive copies. Mere interaction with a user through
89a computer network, with no transfer of a copy, is not conveying.
90
91 An interactive user interface displays "Appropriate Legal Notices"
92to the extent that it includes a convenient and prominently visible
93feature that (1) displays an appropriate copyright notice, and (2)
94tells the user that there is no warranty for the work (except to the
95extent that warranties are provided), that licensees may convey the
96work under this License, and how to view a copy of this License. If
97the interface presents a list of user commands or options, such as a
98menu, a prominent item in the list meets this criterion.
99
100 1. Source Code.
101
102 The "source code" for a work means the preferred form of the work
103for making modifications to it. "Object code" means any non-source
104form of a work.
105
106 A "Standard Interface" means an interface that either is an official
107standard defined by a recognized standards body, or, in the case of
108interfaces specified for a particular programming language, one that
109is widely used among developers working in that language.
110
111 The "System Libraries" of an executable work include anything, other
112than the work as a whole, that (a) is included in the normal form of
113packaging a Major Component, but which is not part of that Major
114Component, and (b) serves only to enable use of the work with that
115Major Component, or to implement a Standard Interface for which an
116implementation is available to the public in source code form. A
117"Major Component", in this context, means a major essential component
118(kernel, window system, and so on) of the specific operating system
119(if any) on which the executable work runs, or a compiler used to
120produce the work, or an object code interpreter used to run it.
121
122 The "Corresponding Source" for a work in object code form means all
123the source code needed to generate, install, and (for an executable
124work) run the object code and to modify the work, including scripts to
125control those activities. However, it does not include the work's
126System Libraries, or general-purpose tools or generally available free
127programs which are used unmodified in performing those activities but
128which are not part of the work. For example, Corresponding Source
129includes interface definition files associated with source files for
130the work, and the source code for shared libraries and dynamically
131linked subprograms that the work is specifically designed to require,
132such as by intimate data communication or control flow between those
133subprograms and other parts of the work.
134
135 The Corresponding Source need not include anything that users
136can regenerate automatically from other parts of the Corresponding
137Source.
138
139 The Corresponding Source for a work in source code form is that
140same work.
141
142 2. Basic Permissions.
143
144 All rights granted under this License are granted for the term of
145copyright on the Program, and are irrevocable provided the stated
146conditions are met. This License explicitly affirms your unlimited
147permission to run the unmodified Program. The output from running a
148covered work is covered by this License only if the output, given its
149content, constitutes a covered work. This License acknowledges your
150rights of fair use or other equivalent, as provided by copyright law.
151
152 You may make, run and propagate covered works that you do not
153convey, without conditions so long as your license otherwise remains
154in force. You may convey covered works to others for the sole purpose
155of having them make modifications exclusively for you, or provide you
156with facilities for running those works, provided that you comply with
157the terms of this License in conveying all material for which you do
158not control copyright. Those thus making or running the covered works
159for you must do so exclusively on your behalf, under your direction
160and control, on terms that prohibit them from making any copies of
161your copyrighted material outside their relationship with you.
162
163 Conveying under any other circumstances is permitted solely under
164the conditions stated below. Sublicensing is not allowed; section 10
165makes it unnecessary.
166
167 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168
169 No covered work shall be deemed part of an effective technological
170measure under any applicable law fulfilling obligations under article
17111 of the WIPO copyright treaty adopted on 20 December 1996, or
172similar laws prohibiting or restricting circumvention of such
173measures.
174
175 When you convey a covered work, you waive any legal power to forbid
176circumvention of technological measures to the extent such circumvention
177is effected by exercising rights under this License with respect to
178the covered work, and you disclaim any intention to limit operation or
179modification of the work as a means of enforcing, against the work's
180users, your or third parties' legal rights to forbid circumvention of
181technological measures.
182
183 4. Conveying Verbatim Copies.
184
185 You may convey verbatim copies of the Program's source code as you
186receive it, in any medium, provided that you conspicuously and
187appropriately publish on each copy an appropriate copyright notice;
188keep intact all notices stating that this License and any
189non-permissive terms added in accord with section 7 apply to the code;
190keep intact all notices of the absence of any warranty; and give all
191recipients a copy of this License along with the Program.
192
193 You may charge any price or no price for each copy that you convey,
194and you may offer support or warranty protection for a fee.
195
196 5. Conveying Modified Source Versions.
197
198 You may convey a work based on the Program, or the modifications to
199produce it from the Program, in the form of source code under the
200terms of section 4, provided that you also meet all of these conditions:
201
202 a) The work must carry prominent notices stating that you modified
203 it, and giving a relevant date.
204
205 b) The work must carry prominent notices stating that it is
206 released under this License and any conditions added under section
207 7. This requirement modifies the requirement in section 4 to
208 "keep intact all notices".
209
210 c) You must license the entire work, as a whole, under this
211 License to anyone who comes into possession of a copy. This
212 License will therefore apply, along with any applicable section 7
213 additional terms, to the whole of the work, and all its parts,
214 regardless of how they are packaged. This License gives no
215 permission to license the work in any other way, but it does not
216 invalidate such permission if you have separately received it.
217
218 d) If the work has interactive user interfaces, each must display
219 Appropriate Legal Notices; however, if the Program has interactive
220 interfaces that do not display Appropriate Legal Notices, your
221 work need not make them do so.
222
223 A compilation of a covered work with other separate and independent
224works, which are not by their nature extensions of the covered work,
225and which are not combined with it such as to form a larger program,
226in or on a volume of a storage or distribution medium, is called an
227"aggregate" if the compilation and its resulting copyright are not
228used to limit the access or legal rights of the compilation's users
229beyond what the individual works permit. Inclusion of a covered work
230in an aggregate does not cause this License to apply to the other
231parts of the aggregate.
232
233 6. Conveying Non-Source Forms.
234
235 You may convey a covered work in object code form under the terms
236of sections 4 and 5, provided that you also convey the
237machine-readable Corresponding Source under the terms of this License,
238in one of these ways:
239
240 a) Convey the object code in, or embodied in, a physical product
241 (including a physical distribution medium), accompanied by the
242 Corresponding Source fixed on a durable physical medium
243 customarily used for software interchange.
244
245 b) Convey the object code in, or embodied in, a physical product
246 (including a physical distribution medium), accompanied by a
247 written offer, valid for at least three years and valid for as
248 long as you offer spare parts or customer support for that product
249 model, to give anyone who possesses the object code either (1) a
250 copy of the Corresponding Source for all the software in the
251 product that is covered by this License, on a durable physical
252 medium customarily used for software interchange, for a price no
253 more than your reasonable cost of physically performing this
254 conveying of source, or (2) access to copy the
255 Corresponding Source from a network server at no charge.
256
257 c) Convey individual copies of the object code with a copy of the
258 written offer to provide the Corresponding Source. This
259 alternative is allowed only occasionally and noncommercially, and
260 only if you received the object code with such an offer, in accord
261 with subsection 6b.
262
263 d) Convey the object code by offering access from a designated
264 place (gratis or for a charge), and offer equivalent access to the
265 Corresponding Source in the same way through the same place at no
266 further charge. You need not require recipients to copy the
267 Corresponding Source along with the object code. If the place to
268 copy the object code is a network server, the Corresponding Source
269 may be on a different server (operated by you or a third party)
270 that supports equivalent copying facilities, provided you maintain
271 clear directions next to the object code saying where to find the
272 Corresponding Source. Regardless of what server hosts the
273 Corresponding Source, you remain obligated to ensure that it is
274 available for as long as needed to satisfy these requirements.
275
276 e) Convey the object code using peer-to-peer transmission, provided
277 you inform other peers where the object code and Corresponding
278 Source of the work are being offered to the general public at no
279 charge under subsection 6d.
280
281 A separable portion of the object code, whose source code is excluded
282from the Corresponding Source as a System Library, need not be
283included in conveying the object code work.
284
285 A "User Product" is either (1) a "consumer product", which means any
286tangible personal property which is normally used for personal, family,
287or household purposes, or (2) anything designed or sold for incorporation
288into a dwelling. In determining whether a product is a consumer product,
289doubtful cases shall be resolved in favor of coverage. For a particular
290product received by a particular user, "normally used" refers to a
291typical or common use of that class of product, regardless of the status
292of the particular user or of the way in which the particular user
293actually uses, or expects or is expected to use, the product. A product
294is a consumer product regardless of whether the product has substantial
295commercial, industrial or non-consumer uses, unless such uses represent
296the only significant mode of use of the product.
297
298 "Installation Information" for a User Product means any methods,
299procedures, authorization keys, or other information required to install
300and execute modified versions of a covered work in that User Product from
301a modified version of its Corresponding Source. The information must
302suffice to ensure that the continued functioning of the modified object
303code is in no case prevented or interfered with solely because
304modification has been made.
305
306 If you convey an object code work under this section in, or with, or
307specifically for use in, a User Product, and the conveying occurs as
308part of a transaction in which the right of possession and use of the
309User Product is transferred to the recipient in perpetuity or for a
310fixed term (regardless of how the transaction is characterized), the
311Corresponding Source conveyed under this section must be accompanied
312by the Installation Information. But this requirement does not apply
313if neither you nor any third party retains the ability to install
314modified object code on the User Product (for example, the work has
315been installed in ROM).
316
317 The requirement to provide Installation Information does not include a
318requirement to continue to provide support service, warranty, or updates
319for a work that has been modified or installed by the recipient, or for
320the User Product in which it has been modified or installed. Access to a
321network may be denied when the modification itself materially and
322adversely affects the operation of the network or violates the rules and
323protocols for communication across the network.
324
325 Corresponding Source conveyed, and Installation Information provided,
326in accord with this section must be in a format that is publicly
327documented (and with an implementation available to the public in
328source code form), and must require no special password or key for
329unpacking, reading or copying.
330
331 7. Additional Terms.
332
333 "Additional permissions" are terms that supplement the terms of this
334License by making exceptions from one or more of its conditions.
335Additional permissions that are applicable to the entire Program shall
336be treated as though they were included in this License, to the extent
337that they are valid under applicable law. If additional permissions
338apply only to part of the Program, that part may be used separately
339under those permissions, but the entire Program remains governed by
340this License without regard to the additional permissions.
341
342 When you convey a copy of a covered work, you may at your option
343remove any additional permissions from that copy, or from any part of
344it. (Additional permissions may be written to require their own
345removal in certain cases when you modify the work.) You may place
346additional permissions on material, added by you to a covered work,
347for which you have or can give appropriate copyright permission.
348
349 Notwithstanding any other provision of this License, for material you
350add to a covered work, you may (if authorized by the copyright holders of
351that material) supplement the terms of this License with terms:
352
353 a) Disclaiming warranty or limiting liability differently from the
354 terms of sections 15 and 16 of this License; or
355
356 b) Requiring preservation of specified reasonable legal notices or
357 author attributions in that material or in the Appropriate Legal
358 Notices displayed by works containing it; or
359
360 c) Prohibiting misrepresentation of the origin of that material, or
361 requiring that modified versions of such material be marked in
362 reasonable ways as different from the original version; or
363
364 d) Limiting the use for publicity purposes of names of licensors or
365 authors of the material; or
366
367 e) Declining to grant rights under trademark law for use of some
368 trade names, trademarks, or service marks; or
369
370 f) Requiring indemnification of licensors and authors of that
371 material by anyone who conveys the material (or modified versions of
372 it) with contractual assumptions of liability to the recipient, for
373 any liability that these contractual assumptions directly impose on
374 those licensors and authors.
375
376 All other non-permissive additional terms are considered "further
377restrictions" within the meaning of section 10. If the Program as you
378received it, or any part of it, contains a notice stating that it is
379governed by this License along with a term that is a further
380restriction, you may remove that term. If a license document contains
381a further restriction but permits relicensing or conveying under this
382License, you may add to a covered work material governed by the terms
383of that license document, provided that the further restriction does
384not survive such relicensing or conveying.
385
386 If you add terms to a covered work in accord with this section, you
387must place, in the relevant source files, a statement of the
388additional terms that apply to those files, or a notice indicating
389where to find the applicable terms.
390
391 Additional terms, permissive or non-permissive, may be stated in the
392form of a separately written license, or stated as exceptions;
393the above requirements apply either way.
394
395 8. Termination.
396
397 You may not propagate or modify a covered work except as expressly
398provided under this License. Any attempt otherwise to propagate or
399modify it is void, and will automatically terminate your rights under
400this License (including any patent licenses granted under the third
401paragraph of section 11).
402
403 However, if you cease all violation of this License, then your
404license from a particular copyright holder is reinstated (a)
405provisionally, unless and until the copyright holder explicitly and
406finally terminates your license, and (b) permanently, if the copyright
407holder fails to notify you of the violation by some reasonable means
408prior to 60 days after the cessation.
409
410 Moreover, your license from a particular copyright holder is
411reinstated permanently if the copyright holder notifies you of the
412violation by some reasonable means, this is the first time you have
413received notice of violation of this License (for any work) from that
414copyright holder, and you cure the violation prior to 30 days after
415your receipt of the notice.
416
417 Termination of your rights under this section does not terminate the
418licenses of parties who have received copies or rights from you under
419this License. If your rights have been terminated and not permanently
420reinstated, you do not qualify to receive new licenses for the same
421material under section 10.
422
423 9. Acceptance Not Required for Having Copies.
424
425 You are not required to accept this License in order to receive or
426run a copy of the Program. Ancillary propagation of a covered work
427occurring solely as a consequence of using peer-to-peer transmission
428to receive a copy likewise does not require acceptance. However,
429nothing other than this License grants you permission to propagate or
430modify any covered work. These actions infringe copyright if you do
431not accept this License. Therefore, by modifying or propagating a
432covered work, you indicate your acceptance of this License to do so.
433
434 10. Automatic Licensing of Downstream Recipients.
435
436 Each time you convey a covered work, the recipient automatically
437receives a license from the original licensors, to run, modify and
438propagate that work, subject to this License. You are not responsible
439for enforcing compliance by third parties with this License.
440
441 An "entity transaction" is a transaction transferring control of an
442organization, or substantially all assets of one, or subdividing an
443organization, or merging organizations. If propagation of a covered
444work results from an entity transaction, each party to that
445transaction who receives a copy of the work also receives whatever
446licenses to the work the party's predecessor in interest had or could
447give under the previous paragraph, plus a right to possession of the
448Corresponding Source of the work from the predecessor in interest, if
449the predecessor has it or can get it with reasonable efforts.
450
451 You may not impose any further restrictions on the exercise of the
452rights granted or affirmed under this License. For example, you may
453not impose a license fee, royalty, or other charge for exercise of
454rights granted under this License, and you may not initiate litigation
455(including a cross-claim or counterclaim in a lawsuit) alleging that
456any patent claim is infringed by making, using, selling, offering for
457sale, or importing the Program or any portion of it.
458
459 11. Patents.
460
461 A "contributor" is a copyright holder who authorizes use under this
462License of the Program or a work on which the Program is based. The
463work thus licensed is called the contributor's "contributor version".
464
465 A contributor's "essential patent claims" are all patent claims
466owned or controlled by the contributor, whether already acquired or
467hereafter acquired, that would be infringed by some manner, permitted
468by this License, of making, using, or selling its contributor version,
469but do not include claims that would be infringed only as a
470consequence of further modification of the contributor version. For
471purposes of this definition, "control" includes the right to grant
472patent sublicenses in a manner consistent with the requirements of
473this License.
474
475 Each contributor grants you a non-exclusive, worldwide, royalty-free
476patent license under the contributor's essential patent claims, to
477make, use, sell, offer for sale, import and otherwise run, modify and
478propagate the contents of its contributor version.
479
480 In the following three paragraphs, a "patent license" is any express
481agreement or commitment, however denominated, not to enforce a patent
482(such as an express permission to practice a patent or covenant not to
483sue for patent infringement). To "grant" such a patent license to a
484party means to make such an agreement or commitment not to enforce a
485patent against the party.
486
487 If you convey a covered work, knowingly relying on a patent license,
488and the Corresponding Source of the work is not available for anyone
489to copy, free of charge and under the terms of this License, through a
490publicly available network server or other readily accessible means,
491then you must either (1) cause the Corresponding Source to be so
492available, or (2) arrange to deprive yourself of the benefit of the
493patent license for this particular work, or (3) arrange, in a manner
494consistent with the requirements of this License, to extend the patent
495license to downstream recipients. "Knowingly relying" means you have
496actual knowledge that, but for the patent license, your conveying the
497covered work in a country, or your recipient's use of the covered work
498in a country, would infringe one or more identifiable patents in that
499country that you have reason to believe are valid.
500
501 If, pursuant to or in connection with a single transaction or
502arrangement, you convey, or propagate by procuring conveyance of, a
503covered work, and grant a patent license to some of the parties
504receiving the covered work authorizing them to use, propagate, modify
505or convey a specific copy of the covered work, then the patent license
506you grant is automatically extended to all recipients of the covered
507work and works based on it.
508
509 A patent license is "discriminatory" if it does not include within
510the scope of its coverage, prohibits the exercise of, or is
511conditioned on the non-exercise of one or more of the rights that are
512specifically granted under this License. You may not convey a covered
513work if you are a party to an arrangement with a third party that is
514in the business of distributing software, under which you make payment
515to the third party based on the extent of your activity of conveying
516the work, and under which the third party grants, to any of the
517parties who would receive the covered work from you, a discriminatory
518patent license (a) in connection with copies of the covered work
519conveyed by you (or copies made from those copies), or (b) primarily
520for and in connection with specific products or compilations that
521contain the covered work, unless you entered into that arrangement,
522or that patent license was granted, prior to 28 March 2007.
523
524 Nothing in this License shall be construed as excluding or limiting
525any implied license or other defenses to infringement that may
526otherwise be available to you under applicable patent law.
527
528 12. No Surrender of Others' Freedom.
529
530 If conditions are imposed on you (whether by court order, agreement or
531otherwise) that contradict the conditions of this License, they do not
532excuse you from the conditions of this License. If you cannot convey a
533covered work so as to satisfy simultaneously your obligations under this
534License and any other pertinent obligations, then as a consequence you may
535not convey it at all. For example, if you agree to terms that obligate you
536to collect a royalty for further conveying from those to whom you convey
537the Program, the only way you could satisfy both those terms and this
538License would be to refrain entirely from conveying the Program.
539
540 13. Remote Network Interaction; Use with the GNU General Public License.
541
542 Notwithstanding any other provision of this License, if you modify the
543Program, your modified version must prominently offer all users
544interacting with it remotely through a computer network (if your version
545supports such interaction) an opportunity to receive the Corresponding
546Source of your version by providing access to the Corresponding Source
547from a network server at no charge, through some standard or customary
548means of facilitating copying of software. This Corresponding Source
549shall include the Corresponding Source for any work covered by version 3
550of the GNU General Public License that is incorporated pursuant to the
551following paragraph.
552
553 Notwithstanding any other provision of this License, you have
554permission to link or combine any covered work with a work licensed
555under version 3 of the GNU General Public License into a single
556combined work, and to convey the resulting work. The terms of this
557License will continue to apply to the part which is the covered work,
558but the work with which it is combined will remain governed by version
5593 of the GNU General Public License.
560
561 14. Revised Versions of this License.
562
563 The Free Software Foundation may publish revised and/or new versions of
564the GNU Affero General Public License from time to time. Such new versions
565will be similar in spirit to the present version, but may differ in detail to
566address new problems or concerns.
567
568 Each version is given a distinguishing version number. If the
569Program specifies that a certain numbered version of the GNU Affero General
570Public License "or any later version" applies to it, you have the
571option of following the terms and conditions either of that numbered
572version or of any later version published by the Free Software
573Foundation. If the Program does not specify a version number of the
574GNU Affero General Public License, you may choose any version ever published
575by the Free Software Foundation.
576
577 If the Program specifies that a proxy can decide which future
578versions of the GNU Affero General Public License can be used, that proxy's
579public statement of acceptance of a version permanently authorizes you
580to choose that version for the Program.
581
582 Later license versions may give you additional or different
583permissions. However, no additional obligations are imposed on any
584author or copyright holder as a result of your choosing to follow a
585later version.
586
587 15. Disclaimer of Warranty.
588
589 THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597
598 16. Limitation of Liability.
599
600 IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608SUCH DAMAGES.
609
610 17. Interpretation of Sections 15 and 16.
611
612 If the disclaimer of warranty and limitation of liability provided
613above cannot be given local legal effect according to their terms,
614reviewing courts shall apply local law that most closely approximates
615an absolute waiver of all civil liability in connection with the
616Program, unless a warranty or assumption of liability accompanies a
617copy of the Program in return for a fee.
618
619 END OF TERMS AND CONDITIONS
620
621 How to Apply These Terms to Your New Programs
622
623 If you develop a new program, and you want it to be of the greatest
624possible use to the public, the best way to achieve this is to make it
625free software which everyone can redistribute and change under these terms.
626
627 To do so, attach the following notices to the program. It is safest
628to attach them to the start of each source file to most effectively
629state the exclusion of warranty; and each file should have at least
630the "copyright" line and a pointer to where the full notice is found.
631
632 <one line to give the program's name and a brief idea of what it does.>
633 Copyright (C) <year> <name of author>
634
635 This program is free software: you can redistribute it and/or modify
636 it under the terms of the GNU Affero General Public License as published
637 by the Free Software Foundation, either version 3 of the License, or
638 (at your option) any later version.
639
640 This program is distributed in the hope that it will be useful,
641 but WITHOUT ANY WARRANTY; without even the implied warranty of
642 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 GNU Affero General Public License for more details.
644
645 You should have received a copy of the GNU Affero General Public License
646 along with this program. If not, see <https://www.gnu.org/licenses/>.
647
648Also add information on how to contact you by electronic and paper mail.
649
650 If your software can interact with users remotely through a computer
651network, you should also make sure that it provides a way for users to
652get its source. For example, if your program is a web application, its
653interface could display a "Source" link that leads users to an archive
654of the code. There are many ways you could offer source, and different
655solutions will be better for different programs; see section 13 for the
656specific requirements.
657
658 You should also get your employer (if you work as a programmer) or school,
659if any, to sign a "copyright disclaimer" for the program, if necessary.
660For more information on this, and how to apply and follow the GNU AGPL, see
661<https://www.gnu.org/licenses/>.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..32a5d00
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,2 @@
1include LICENSE
2include tap_google_sheets/schemas/*.json
diff --git a/README.md b/README.md
index db00e68..6fcb0b9 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,172 @@
1tap-google-sheets 1# tap-google-sheets
2
3This is a [Singer](https://singer.io) tap that produces JSON-formatted data
4following the [Singer
5spec](https://github.com/singer-io/getting-started/blob/master/SPEC.md).
6
7This tap:
8
9- Pulls raw data from the [Google Sheets v4 API](https://developers.google.com/sheets/api)
10- Extracts the following endpoints:
11 - [Spreadsheet File](https://developers.google.com/drive/api/v3/reference/files/get)
12 - [Spreadsheet Metadata, Sheets, Headers](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/get)
13 - [Spreadsheet Values](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get)
14- For each Sheet:
15 - Outputs the schema for each resource (based on the column header and datatypes of first row of data)
16 - Outputs a record for all columns with column headers, and for each row of data until it reaches an empty row
17
18## API Endpoints
19[**file (GET)**](https://developers.google.com/drive/api/v3/reference/files/get)
20- Endpoint: https://www.googleapis.com/drive/v3/files/${spreadsheet_id}?fields=id,name,createdTime,modifiedTime,version
21- Primary keys: id
22- Replication strategy: Full (GET file audit data for spreadsheet_id in config)
23- Process/Transformations: Replicate Data if Modified
24
25[**metadata (GET)**](https://developers.google.com/drive/api/v3/reference/files/get)
26- Endpoint: https://sheets.googleapis.com/v4/spreadsheets/${spreadsheet_id}?includeGridData=true&ranges=1:2
27- This endpoint eturns spreadsheet metadata, sheet metadata, and value metadata (data type information)
28- Primary keys: spreadsheetId, title, field_name
29- Foreign keys: None
30- Replication strategy: Full (get and replace file metadata for spreadshee_id in config)
31- Process/Transformations:
32 - Verify Sheets: Check sheets exist (compared to catalog) and check gridProperties (available area)
33 - sheetId, title, index, gridProperties (rowCount, columnCount)
34 - Verify Field Headers (1st row): Check field headers exist (compared to catalog), missing headers (columns to skip), column order/position, and column uniqueness
35 - Header's field_name, position: data.rowData[0].values[i].formattedValue
36 - Create/Verify Datatypes (2nd row):
37 - Row 2's datatype, format: data.rowData[1].values[i]
38 - First check:
39 - [effectiveValue: key](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue)
40 - Valid types: numberValue, stringValue, boolValue
41 - Invalid types: formulaValue, errorValue
42 - Then check:
43 - [effectiveFormat.numberFormat.type](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType)
44 - Valid types: UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, TIME, DATE_TIME, SCIENTIFIC
45 - If DATE or DATE_TIME, set JSON schema datatype = string and format = date-time
46 - [effectiveFormat.numberFormat.pattern](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormat)
47
48[**values (GET)**](https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets.values/get)
49- Endpoint: https://sheets.googleapis.com/v4/spreadsheets/${spreadsheet_id}/values/'${sheet_name}'!${row_range}?dateTimeRenderOption=SERIAL_NUMBER&valueRenderOption=UNFORMATTED_VALUE&majorDimension=ROWS
50- This endpoint loops through sheets and row ranges to get the [unformatted values](https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption) (effective values only), dates and datetimes as [serial numbers](https://developers.google.com/sheets/api/reference/rest/v4/DateTimeRenderOption)
51- Primary keys: row
52- Replication strategy: Full (GET file audit data for spreadsheet_id in config)
53- Process/Transformations:
54 - Loop through sheets (compared to catalog selection)
55 - Send metadata for sheet
56 - Loop through ranges of rows until reaching empty row or area max row (from sheet metadata)
57 - Transform values, if necessary (dates, date-times, boolean, integer, numers)
58 - Process/send records
59
60## Authentication
61The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=1FojlvtLwS0-BzGS37R0jEXtwSHqSiO1Uw-7RKQQO-C4) Google Doc provides instructions show how to configure the Google Cloud API credentials to enable Google Drive and Google Sheets APIs, configure Google Cloud to authorize/verify your domain ownership, generate an API key (client_id, client_secret), authenticate and generate a refresh_token, and prepare your tap config.json with the necessary parameters.
62- Enable Googe Drive APIs and Authorization Scope: https://www.googleapis.com/auth/drive.metadata.readonly
63- Enable Google Sheets API and Authorization Scope: https://www.googleapis.com/auth/spreadsheets.readonly
64- Tap config.json parameters:
65 - client_id: identifies your application
66 - client_secret: authenticates your application
67 - refresh_token: generates an access token to authorize your session
68 - spreadsheet_id: unique identifier for each spreadsheet in Google Drive
69 - start_date: absolute minimum start date to check file modified
70 - user_agent: tap-name and email address; identifies your application in the Remote API server logs
71
72## Quick Start
73
741. Install
75
76 Clone this repository, and then install using setup.py. We recommend using a virtualenv:
77
78 ```bash
79 > virtualenv -p python3 venv
80 > source venv/bin/activate
81 > python setup.py install
82 OR
83 > cd .../tap-google-sheets
84 > pip install .
85 ```
862. Dependent libraries
87 The following dependent libraries were installed.
88 ```bash
89 > pip install target-json
90 > pip install target-stitch
91 > pip install singer-tools
92 > pip install singer-python
93 ```
94 - [singer-tools](https://github.com/singer-io/singer-tools)
95 - [target-stitch](https://github.com/singer-io/target-stitch)
96
973. Create your tap's `config.json` file. Include the client_id, client_secret, refresh_token, site_urls (website URL properties in a comma delimited list; do not include the domain-level property in the list), start_date (UTC format), and user_agent (tap name with the api user email address).
98
99 ```json
100 {
101 "client_id": "YOUR_CLIENT_ID",
102 "client_secret": "YOUR_CLIENT_SECRET",
103 "refresh_token": "YOUR_REFRESH_TOKEN",
104 "spreadsheet_id": "YOUR_GOOGLE_SPREADSHEET_ID",
105 "start_date": "2019-01-01T00:00:00Z",
106 "user_agent": "tap-google-sheets <api_user_email@example.com>"
107 }
108 ```
109
110 Optionally, also create a `state.json` file. `currently_syncing` is an optional attribute used for identifying the last object to be synced in case the job is interrupted mid-stream. The next run would begin where the last job left off.
111 Only the `performance_reports` uses a bookmark. The date-time bookmark is stored in a nested structure based on the endpoint, site, and sub_type.
112
113 ```json
114 {
115 "currently_syncing": "file_metadata",
116 "bookmarks": {
117 "file_metadata": "2019-09-27T22:34:39.000000Z",
118 "sheets_loaded": "2019-09-27T21:55:42.000000Z"
119 }
120 }
121 ```
122
1234. Run the Tap in Discovery Mode
124 This creates a catalog.json for selecting objects/fields to integrate:
125 ```bash
126 tap-google-sheets --config config.json --discover > catalog.json
127 ```
128 See the Singer docs on discovery mode
129 [here](https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#discovery-mode).
130
1315. Run the Tap in Sync Mode (with catalog) and [write out to state file](https://github.com/singer-io/getting-started/blob/master/docs/RUNNING_AND_DEVELOPING.md#running-a-singer-tap-with-a-singer-target)
132
133 For Sync mode:
134 ```bash
135 > tap-google-sheets --config tap_config.json --catalog catalog.json > state.json
136 > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json
137 ```
138 To load to json files to verify outputs:
139 ```bash
140 > tap-google-sheets --config tap_config.json --catalog catalog.json | target-json > state.json
141 > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json
142 ```
143 To pseudo-load to [Stitch Import API](https://github.com/singer-io/target-stitch) with dry run:
144 ```bash
145 > tap-google-sheets --config tap_config.json --catalog catalog.json | target-stitch --config target_config.json --dry-run > state.json
146 > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json
147 ```
148
1496. Test the Tap
150
151 While developing the Google Search Console tap, the following utilities were run in accordance with Singer.io best practices:
152 Pylint to improve [code quality](https://github.com/singer-io/getting-started/blob/master/docs/BEST_PRACTICES.md#code-quality):
153 ```bash
154 > pylint tap_google_sheets -d missing-docstring -d logging-format-interpolation -d too-many-locals -d too-many-arguments
155 ```
156 Pylint test resulted in the following score:
157 ```bash
158 TBD
159 ```
160
161 To [check the tap](https://github.com/singer-io/singer-tools#singer-check-tap) and verify working:
162 ```bash
163 > tap-google-sheets --config tap_config.json --catalog catalog.json | singer-check-tap > state.json
164 > tail -1 state.json > state.json.tmp && mv state.json.tmp state.json
165 ```
166 Check tap resulted in the following:
167 ```bash
168 TBD
169 ```
170---
171
172Copyright &copy; 2019 Stitch
diff --git a/config.json.example b/config.json.example
new file mode 100644
index 0000000..159aafb
--- /dev/null
+++ b/config.json.example
@@ -0,0 +1,8 @@
1{
2 "client_id": "YOUR_CLIENT_ID",
3 "client_secret": "YOUR_CLIENT_SECRET",
4 "refresh_token": "YOUR_REFRESH_TOKEN",
5 "spreadsheet_id": "YOUR_GOOGLE_SPREADSHEET_ID",
6 "start_date": "2019-01-01T00:00:00Z",
7 "user_agent": "tap-google-search-console <api_user_email@example.com>"
8}
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e3c4f3e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,25 @@
1#!/usr/bin/env python
2
3from setuptools import setup, find_packages
4
5setup(name='tap-google-sheets',
6 version='0.0.1',
7 description='Singer.io tap for extracting data from the Google Sheets v4 API',
8 author='jeff.huth@bytecode.io',
9 classifiers=['Programming Language :: Python :: 3 :: Only'],
10 py_modules=['tap_google_sheets'],
11 install_requires=[
12 'backoff==1.8.0',
13 'requests==2.22.0',
14 'singer-python==5.8.1'
15 ],
16 entry_points='''
17 [console_scripts]
18 tap-google-sheets=tap_google_sheets:main
19 ''',
20 packages=find_packages(),
21 package_data={
22 'tap_google_sheets': [
23 'schemas/*.json'
24 ]
25 })
diff --git a/state.json.example b/state.json.example
new file mode 100644
index 0000000..c33cc8f
--- /dev/null
+++ b/state.json.example
@@ -0,0 +1,6 @@
1{
2 "currently_syncing": "spreadsheets",
3 "bookmarks": {
4 "file": "2019-09-27T22:34:39.000000Z"
5 }
6}
diff --git a/tap_google_sheets/__init__.py b/tap_google_sheets/__init__.py
new file mode 100644
index 0000000..f97d4b8
--- /dev/null
+++ b/tap_google_sheets/__init__.py
@@ -0,0 +1,57 @@
1#!/usr/bin/env python3
2
3import sys
4import json
5import argparse
6import singer
7from singer import metadata, utils
8from tap_google_sheets.client import GoogleClient
9from tap_google_sheets.discover import discover
10from tap_google_sheets.sync import sync
11
12LOGGER = singer.get_logger()
13
14REQUIRED_CONFIG_KEYS = [
15 'client_id',
16 'client_secret',
17 'refresh_token',
18 'spreadsheet_id',
19 'start_date',
20 'user_agent'
21]
22
23def do_discover(client, spreadsheet_id):
24
25 LOGGER.info('Starting discover')
26 catalog = discover(client, spreadsheet_id)
27 json.dump(catalog.to_dict(), sys.stdout, indent=2)
28 LOGGER.info('Finished discover')
29
30
31@singer.utils.handle_top_exception(LOGGER)
32def main():
33
34 parsed_args = singer.utils.parse_args(REQUIRED_CONFIG_KEYS)
35
36 with GoogleClient(parsed_args.config['client_id'],
37 parsed_args.config['client_secret'],
38 parsed_args.config['refresh_token'],
39 parsed_args.config['user_agent']) as client:
40
41 state = {}
42 if parsed_args.state:
43 state = parsed_args.state
44
45 config = parsed_args.config
46 spreadsheet_id = config.get('spreadsheet_id')
47
48 if parsed_args.discover:
49 do_discover(client, spreadsheet_id)
50 elif parsed_args.catalog:
51 sync(client=client,
52 config=config,
53 catalog=parsed_args.catalog,
54 state=state)
55
56if __name__ == '__main__':
57 main()
diff --git a/tap_google_sheets/client.py b/tap_google_sheets/client.py
new file mode 100644
index 0000000..12f0811
--- /dev/null
+++ b/tap_google_sheets/client.py
@@ -0,0 +1,247 @@
1from datetime import datetime, timedelta
2import backoff
3import requests
4from collections import OrderedDict
5
6import singer
7from singer import metrics
8from singer import utils
9
10BASE_URL = 'https://www.googleapis.com'
11GOOGLE_TOKEN_URI = 'https://oauth2.googleapis.com/token'
12LOGGER = singer.get_logger()
13
14
15class Server5xxError(Exception):
16 pass
17
18
19class Server429Error(Exception):
20 pass
21
22
23class GoogleError(Exception):
24 pass
25
26
27class GoogleBadRequestError(GoogleError):
28 pass
29
30
31class GoogleUnauthorizedError(GoogleError):
32 pass
33
34
35class GooglePaymentRequiredError(GoogleError):
36 pass
37
38
39class GoogleNotFoundError(GoogleError):
40 pass
41
42
43class GoogleMethodNotAllowedError(GoogleError):
44 pass
45
46
47class GoogleConflictError(GoogleError):
48 pass
49
50
51class GoogleGoneError(GoogleError):
52 pass
53
54
55class GooglePreconditionFailedError(GoogleError):
56 pass
57
58
59class GoogleRequestEntityTooLargeError(GoogleError):
60 pass
61
62
63class GoogleRequestedRangeNotSatisfiableError(GoogleError):
64 pass
65
66
67class GoogleExpectationFailedError(GoogleError):
68 pass
69
70
71class GoogleForbiddenError(GoogleError):
72 pass
73
74
75class GoogleUnprocessableEntityError(GoogleError):
76 pass
77
78
79class GooglePreconditionRequiredError(GoogleError):
80 pass
81
82
83class GoogleInternalServiceError(GoogleError):
84 pass
85
86
87# Error Codes: https://developers.google.com/webmaster-tools/search-console-api-original/v3/errors
88ERROR_CODE_EXCEPTION_MAPPING = {
89 400: GoogleBadRequestError,
90 401: GoogleUnauthorizedError,
91 402: GooglePaymentRequiredError,
92 403: GoogleForbiddenError,
93 404: GoogleNotFoundError,
94 405: GoogleMethodNotAllowedError,
95 409: GoogleConflictError,
96 410: GoogleGoneError,
97 412: GooglePreconditionFailedError,
98 413: GoogleRequestEntityTooLargeError,
99 416: GoogleRequestedRangeNotSatisfiableError,
100 417: GoogleExpectationFailedError,
101 422: GoogleUnprocessableEntityError,
102 428: GooglePreconditionRequiredError,
103 500: GoogleInternalServiceError}
104
105
106def get_exception_for_error_code(error_code):
107 return ERROR_CODE_EXCEPTION_MAPPING.get(error_code, GoogleError)
108
109def raise_for_error(response):
110 try:
111 response.raise_for_status()
112 except (requests.HTTPError, requests.ConnectionError) as error:
113 try:
114 content_length = len(response.content)
115 if content_length == 0:
116 # There is nothing we can do here since Google has neither sent
117 # us a 2xx response nor a response content.
118 return
119 response = response.json()
120 if ('error' in response) or ('errorCode' in response):
121 message = '%s: %s' % (response.get('error', str(error)),
122 response.get('message', 'Unknown Error'))
123 error_code = response.get('error', {}).get('code')
124 ex = get_exception_for_error_code(error_code)
125 raise ex(message)
126 else:
127 raise GoogleError(error)
128 except (ValueError, TypeError):
129 raise GoogleError(error)
130
131class GoogleClient: # pylint: disable=too-many-instance-attributes
132 def __init__(self,
133 client_id,
134 client_secret,
135 refresh_token,
136 user_agent=None):
137 self.__client_id = client_id
138 self.__client_secret = client_secret
139 self.__refresh_token = refresh_token
140 self.__user_agent = user_agent
141 self.__access_token = None
142 self.__expires = None
143 self.__session = requests.Session()
144 self.base_url = None
145
146
147 def __enter__(self):
148 self.get_access_token()
149 return self
150
151 def __exit__(self, exception_type, exception_value, traceback):
152 self.__session.close()
153
154 @backoff.on_exception(backoff.expo,
155 Server5xxError,
156 max_tries=5,
157 factor=2)
158 def get_access_token(self):
159 # The refresh_token never expires and may be used many times to generate each access_token
160 # Since the refresh_token does not expire, it is not included in get access_token response
161 if self.__access_token is not None and self.__expires > datetime.utcnow():
162 return
163
164 headers = {}
165 if self.__user_agent:
166 headers['User-Agent'] = self.__user_agent
167
168 response = self.__session.post(
169 url=GOOGLE_TOKEN_URI,
170 headers=headers,
171 data={
172 'grant_type': 'refresh_token',
173 'client_id': self.__client_id,
174 'client_secret': self.__client_secret,
175 'refresh_token': self.__refresh_token,
176 })
177
178 if response.status_code >= 500:
179 raise Server5xxError()
180
181 if response.status_code != 200:
182 raise_for_error(response)
183
184 data = response.json()
185 self.__access_token = data['access_token']
186 self.__expires = datetime.utcnow() + timedelta(seconds=data['expires_in'])
187 LOGGER.info('Authorized, token expires = {}'.format(self.__expires))
188
189
190 @backoff.on_exception(backoff.expo,
191 (Server5xxError, ConnectionError, Server429Error),
192 max_tries=7,
193 factor=3)
194 # Rate Limit:
195 # https://developers.google.com/webmaster-tools/search-console-api-original/v3/limits
196 @utils.ratelimit(1200, 60)
197 def request(self, method, path=None, url=None, api=None, **kwargs):
198
199 self.get_access_token()
200
201 self.base_url = 'https://sheets.googleapis.com/v4'
202 if api == 'files':
203 self.base_url = 'https://www.googleapis.com/drive/v3'
204
205 if not url and path:
206 url = '{}/{}'.format(self.base_url, path)
207
208 # endpoint = stream_name (from sync.py API call)
209 if 'endpoint' in kwargs:
210 endpoint = kwargs['endpoint']
211 del kwargs['endpoint']
212 else:
213 endpoint = None
214
215 if 'headers' not in kwargs:
216 kwargs['headers'] = {}
217 kwargs['headers']['Authorization'] = 'Bearer {}'.format(self.__access_token)
218
219 if self.__user_agent:
220 kwargs['headers']['User-Agent'] = self.__user_agent
221
222 if method == 'POST':
223 kwargs['headers']['Content-Type'] = 'application/json'
224
225 with metrics.http_request_timer(endpoint) as timer:
226 response = self.__session.request(method, url, **kwargs)
227 timer.tags[metrics.Tag.http_status_code] = response.status_code
228
229 if response.status_code >= 500:
230 raise Server5xxError()
231
232 #Use retry functionality in backoff to wait and retry if
233 #response code equals 429 because rate limit has been exceeded
234 if response.status_code == 429:
235 raise Server429Error()
236
237 if response.status_code != 200:
238 raise_for_error(response)
239
240 # Ensure keys and rows are ordered as received from API
241 return response.json(object_pairs_hook=OrderedDict)
242
243 def get(self, path, api, **kwargs):
244 return self.request(method='GET', path=path, api=api, **kwargs)
245
246 def post(self, path, api, **kwargs):
247 return self.request(method='POST', path=path, api=api, **kwargs)
diff --git a/tap_google_sheets/discover.py b/tap_google_sheets/discover.py
new file mode 100644
index 0000000..6477a5f
--- /dev/null
+++ b/tap_google_sheets/discover.py
@@ -0,0 +1,26 @@
1from singer.catalog import Catalog, CatalogEntry, Schema
2from tap_google_sheets.schema import get_schemas, STREAMS
3
4
5def discover(client, spreadsheet_id):
6 schemas, field_metadata = get_schemas(client, spreadsheet_id)
7 catalog = Catalog([])
8
9 for stream_name, schema_dict in schemas.items():
10 schema = Schema.from_dict(schema_dict)
11 mdata = field_metadata[stream_name]
12 key_properties = None
13 for md in mdata:
14 table_key_properties = md.get('metadata', {}).get('table-key-properties')
15 if table_key_properties:
16 key_properties = table_key_properties
17
18 catalog.streams.append(CatalogEntry(
19 stream=stream_name,
20 tap_stream_id=stream_name,
21 key_properties=STREAMS.get(stream_name, {}).get('key_properties', key_properties),
22 schema=schema,
23 metadata=mdata
24 ))
25
26 return catalog
diff --git a/tap_google_sheets/schema.py b/tap_google_sheets/schema.py
new file mode 100644
index 0000000..237ab06
--- /dev/null
+++ b/tap_google_sheets/schema.py
@@ -0,0 +1,228 @@
1import os
2import json
3from collections import OrderedDict
4import singer
5from singer import metadata
6from tap_google_sheets.streams import STREAMS
7
8LOGGER = singer.get_logger()
9
10# Reference:
11# https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#Metadata
12
13# Convert column index to column letter
14def colnum_string(n):
15 string = ""
16 while n > 0:
17 n, remainder = divmod(n - 1, 26)
18 string = chr(65 + remainder) + string
19 return string
20
21
22# Create sheet_metadata_json with columns from sheet
23def get_sheet_schema_columns(sheet, spreadsheet_id, client):
24 sheet_json_schema = OrderedDict()
25 data = next(iter(sheet.get('data', [])), {})
26 row_data = data.get('rowData',[])
27 # spreadsheet is an OrderedDict, with orderd sheets and rows in the repsonse
28
29 headers = row_data[0].get('values', [])
30 first_values = row_data[1].get('values', [])
31 # LOGGER.info('first_values = {}'.format(json.dumps(first_values, indent=2, sort_keys=True)))
32
33 sheet_json_schema['type'] = 'object'
34 sheet_json_schema['additionalProperties'] = False
35 sheet_json_schema = {
36 'type': 'object',
37 'additionalProperties': False,
38 'properties': {
39 '__sdc_spreadsheet_id': {
40 'type': ['null', 'string']
41 },
42 '__sdc_sheet_id': {
43 'type': ['null', 'integer']
44 },
45 '__sdc_row': {
46 'type': ['null', 'integer']
47 }
48 }
49 }
50
51 header_list = [] # used for checking uniqueness
52 columns = []
53 prior_header = None
54 i = 0
55 skipped = 0
56 # Read column headers until end or 2 consecutive skipped headers
57 for header in headers:
58 # LOGGER.info('header = {}'.format(json.dumps(header, indent=2, sort_keys=True)))
59 column_index = i + 1
60 column_letter = colnum_string(column_index)
61 header_value = header.get('formattedValue')
62 if header_value: # NOT skipped
63 column_is_skipped = False
64 skipped = 0
65 column_name = '{}'.format(header_value)
66 if column_name in header_list:
67 raise Exception('DUPLICATE HEADER ERROR: {}'.format(column_name))
68 else:
69 header_list.append(column_name)
70
71 first_value = first_values[i]
72 # LOGGER.info('first_value[{}] = {}'.format(i, json.dumps(first_value, indent=2, sort_keys=True)))
73
74 column_effective_value = first_value.get('effectiveValue', {})
75 for key in column_effective_value.keys():
76 if key in ('numberValue', 'stringValue', 'boolValue', 'errorType', 'formulaType'):
77 column_effective_value_type = key
78
79 column_number_format = first_values[i].get('effectiveFormat', {}).get('numberFormat', {})
80 column_number_format_type = column_number_format.get('type')
81
82 # Determine datatype for sheet_json_schema
83 #
84 # column_effective_value_type = numberValue, stringValue, boolValue; INVALID: errorType, formulaType
85 # Reference: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue
86 #
87 # column_number_format_type = UNEPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE, TIME, DATE_TIME, SCIENTIFIC
88 # Reference: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType
89 #
90 column_format = None # Default
91 # column_multiple_of = None # Default
92 if column_effective_value_type in ('formulaValue', 'errorValue'):
93 raise Exception('INVALID DATA TYPE ERROR: {}, value: {}'.format(column_name))
94 elif column_effective_value_type == 'stringValue':
95 column_type = ['null', 'string']
96 column_gs_type = 'stringValue'
97 elif column_effective_value_type == 'boolValue':
98 column_type = ['null', 'boolean', 'string']
99 column_gs_type = 'boolValue'
100 elif column_effective_value_type == 'numberValue':
101 if column_number_format_type == 'DATE_TIME':
102 column_type = ['null', 'string']
103 column_format = 'date-time'
104 column_gs_type = 'numberType.DATE_TIME'
105 elif column_number_format_type == 'DATE':
106 column_type = ['null', 'string']
107 column_format = 'date'
108 column_gs_type = 'numberType.DATE'
109 elif column_number_format_type == 'TIME':
110 column_type = ['null', 'string']
111 column_format = 'time'
112 column_gs_type = 'numberType.TIME'
113 elif column_number_format_type == 'TEXT':
114 column_type = ['null', 'string']
115 column_gs_type = 'stringValue'
116 else:
117 column_type = ['null', 'number', 'string']
118 column_gs_type = 'numberType'
119
120 else: # skipped
121 column_is_skipped = True
122 skipped = skipped + 1
123 column_index_str = str(column_index).zfill(2)
124 column_name = '__sdc_skip_col_{}'.format(column_index_str)
125 column_type = ['null', 'string']
126 column_format = None
127 column_gs_type = 'stringValue'
128
129 if skipped >= 2:
130 # skipped = 2 consecutive skipped headers
131 # Remove prior_header column_name
132 sheet_json_schema['properties'].pop(prior_header, None)
133 column_count = i - 1
134 break
135
136 else:
137 column = {}
138 column = {
139 'columnIndex': column_index,
140 'columnLetter': column_letter,
141 'columnName': column_name,
142 'columnType': column_gs_type,
143 'columnSkipped': column_is_skipped
144 }
145 columns.append(column)
146
147 sheet_json_schema['properties'][column_name] = column
148 sheet_json_schema['properties'][column_name]['type'] = column_type
149 if column_format:
150 sheet_json_schema['properties'][column_name]['format'] = column_format
151
152 prior_header = column_name
153 i = i + 1
154
155 return sheet_json_schema, columns
156
157
158def get_sheet_metadata(sheet, spreadsheet_id, client):
159 sheet_id = sheet.get('properties', {}).get('sheetId')
160 sheet_title = sheet.get('properties', {}).get('title')
161 LOGGER.info('sheet_id = {}, sheet_title = {}'.format(sheet_id, sheet_title))
162
163 stream_name = 'sheet_metadata'
164 stream_metadata = STREAMS.get(stream_name)
165 api = stream_metadata.get('api', 'sheets')
166 params = stream_metadata.get('params', {})
167 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace('{sheet_title}', sheet_title)
168 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', spreadsheet_id), querystring)
169
170 sheet_md_results = client.get(path=path, api=api, endpoint=stream_name)
171 sheet_cols = sheet_md_results.get('sheets')[0]
172 sheet_schema, columns = get_sheet_schema_columns(sheet_cols, spreadsheet_id, client)
173
174 return sheet_schema, columns
175
176
177def get_abs_path(path):
178 return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
179
180def get_schemas(client, spreadsheet_id):
181 schemas = {}
182 field_metadata = {}
183
184 for stream_name, stream_metadata in STREAMS.items():
185 schema_path = get_abs_path('schemas/{}.json'.format(stream_name))
186 with open(schema_path) as file:
187 schema = json.load(file)
188 schemas[stream_name] = schema
189 mdata = metadata.new()
190
191 # Documentation:
192 # https://github.com/singer-io/getting-started/blob/master/docs/DISCOVERY_MODE.md#singer-python-helper-functions
193 # Reference:
194 # https://github.com/singer-io/singer-python/blob/master/singer/metadata.py#L25-L44
195 mdata = metadata.get_standard_metadata(
196 schema=schema,
197 key_properties=stream_metadata.get('key_properties', None),
198 valid_replication_keys=stream_metadata.get('replication_keys', None),
199 replication_method=stream_metadata.get('replication_method', None)
200 )
201 field_metadata[stream_name] = mdata
202
203 if stream_name == 'spreadsheet_metadata':
204 api = stream_metadata.get('api', 'sheets')
205 params = stream_metadata.get('params', {})
206 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()])
207 path = '{}?{}'.format(stream_metadata.get('path').replace('{spreadsheet_id}', spreadsheet_id), querystring)
208
209 spreadsheet_md_results = client.get(path=path, params=querystring, api=api, endpoint=stream_name)
210
211 sheets = spreadsheet_md_results.get('sheets')
212 if sheets:
213 for sheet in sheets:
214 sheet_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)
215 # LOGGER.info('sheet_schema = {}'.format(json.dumps(sheet_schema, indent=2, sort_keys=True)))
216
217 sheet_title = sheet.get('properties', {}).get('title')
218 schemas[sheet_title] = sheet_schema
219 sheet_mdata = metadata.new()
220 sheet_mdata = metadata.get_standard_metadata(
221 schema=sheet_schema,
222 key_properties=['__sdc_row'],
223 valid_replication_keys=None,
224 replication_method='FULL_TABLE'
225 )
226 field_metadata[sheet_title] = sheet_mdata
227
228 return schemas, field_metadata
diff --git a/tap_google_sheets/schemas/file_metadata.json b/tap_google_sheets/schemas/file_metadata.json
new file mode 100644
index 0000000..25c19c4
--- /dev/null
+++ b/tap_google_sheets/schemas/file_metadata.json
@@ -0,0 +1,44 @@
1{
2 "type": "object",
3 "additionalProperties": false,
4 "properties": {
5 "id": {
6 "type": ["null", "string"]
7 },
8 "name": {
9 "type": ["null", "string"]
10 },
11 "version": {
12 "type": ["null", "integer"]
13 },
14 "createdTime": {
15 "type": ["null", "string"],
16 "format": "date-time"
17 },
18 "modifiedTime": {
19 "type": ["null", "string"],
20 "format": "date-time"
21 },
22 "teamDriveId": {
23 "type": ["null", "string"]
24 },
25 "driveId": {
26 "type": ["null", "string"]
27 },
28 "lastModifyingUser": {
29 "type": ["null", "object"],
30 "additionalProperties": false,
31 "properties": {
32 "kind": {
33 "type": ["null", "integer"]
34 },
35 "displayName": {
36 "type": ["null", "string"]
37 },
38 "emailAdress": {
39 "type": ["null", "string"]
40 }
41 }
42 }
43 }
44}
diff --git a/tap_google_sheets/schemas/sheet_metadata.json b/tap_google_sheets/schemas/sheet_metadata.json
new file mode 100644
index 0000000..c3f2ac2
--- /dev/null
+++ b/tap_google_sheets/schemas/sheet_metadata.json
@@ -0,0 +1,89 @@
1{
2 "type": "object",
3 "additionalProperties": false,
4 "properties": {
5 "spreadsheetId": {
6 "type": ["null", "string"]
7 },
8 "sheetId": {
9 "type": ["null", "integer"]
10 },
11 "title": {
12 "type": ["null", "string"]
13 },
14 "index": {
15 "type": ["null", "integer"]
16 },
17 "sheetType": {
18 "type": ["null", "string"]
19 },
20 "sheetUrl": {
21 "type": ["null", "string"]
22 },
23 "gridProperties": {
24 "type": ["null", "object"],
25 "additionalProperties": false,
26 "properties": {
27 "rowCount": {
28 "type": ["null", "integer"]
29 },
30 "columnCount": {
31 "type": ["null", "integer"]
32 },
33 "frozenRowCount": {
34 "type": ["null", "integer"]
35 },
36 "frozenColumnCount": {
37 "type": ["null", "integer"]
38 }
39 }
40 },
41 "columns": {
42 "anyOf": [
43 {
44 "type": "array",
45 "items": {
46 "type": ["null", "object"],
47 "additionalProperties": false,
48 "properties": {
49 "columnIndex": {
50 "type": ["null", "integer"]
51 },
52 "columnLetter": {
53 "type": ["null", "string"]
54 },
55 "columnName": {
56 "type": ["null", "string"]
57 },
58 "columnType": {
59 "type": ["null", "string"]
60 },
61 "columnSkipped": {
62 "type": ["null", "boolean"]
63 },
64 "type": {
65 "anyOf": [
66 {
67 "type": "array",
68 "items": {
69 "type": "string"
70 }
71 },
72 {
73 "type": "null"
74 }
75 ]
76 },
77 "format": {
78 "type": ["null", "string"]
79 }
80 }
81 }
82 },
83 {
84 "type": "null"
85 }
86 ]
87 }
88 }
89}
diff --git a/tap_google_sheets/schemas/sheets_loaded.json b/tap_google_sheets/schemas/sheets_loaded.json
new file mode 100644
index 0000000..12f967a
--- /dev/null
+++ b/tap_google_sheets/schemas/sheets_loaded.json
@@ -0,0 +1,22 @@
1{
2 "type": "object",
3 "additionalProperties": false,
4 "properties": {
5 "spreadsheetId": {
6 "type": ["null", "string"]
7 },
8 "sheetId": {
9 "type": ["null", "integer"]
10 },
11 "sheetTitle": {
12 "type": ["null", "string"]
13 },
14 "loadDate": {
15 "type": ["null", "string"],
16 "format": "date-time"
17 },
18 "lastRowNumber": {
19 "type": ["null", "integer"]
20 }
21 }
22}
diff --git a/tap_google_sheets/schemas/spreadsheet_metadata.json b/tap_google_sheets/schemas/spreadsheet_metadata.json
new file mode 100644
index 0000000..852cb76
--- /dev/null
+++ b/tap_google_sheets/schemas/spreadsheet_metadata.json
@@ -0,0 +1,30 @@
1{
2 "type": "object",
3 "additionalProperties": false,
4 "properties": {
5 "spreadsheetId": {
6 "type": ["null", "string"]
7 },
8 "properties": {
9 "type": ["null", "object"],
10 "additionalProperties": false,
11 "properties": {
12 "title": {
13 "type": ["null", "string"]
14 },
15 "locale": {
16 "type": ["null", "string"]
17 },
18 "autoRecalc": {
19 "type": ["null", "string"]
20 },
21 "timeZone": {
22 "type": ["null", "string"]
23 }
24 }
25 },
26 "spreadsheetUrl": {
27 "type": ["null", "string"]
28 }
29 }
30}
diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py
new file mode 100644
index 0000000..299326a
--- /dev/null
+++ b/tap_google_sheets/streams.py
@@ -0,0 +1,66 @@
1from collections import OrderedDict
2
3# streams: API URL endpoints to be called
4# properties:
5# <root node>: Plural stream name for the endpoint
6# path: API endpoint relative path, when added to the base URL, creates the full path,
7# default = stream_name
8# key_properties: Primary key fields for identifying an endpoint record.
9# replication_method: INCREMENTAL or FULL_TABLE
10# replication_keys: bookmark_field(s), typically a date-time, used for filtering the results
11# and setting the state
12# params: Query, sort, and other endpoint specific parameters; default = {}
13# data_key: JSON element containing the results list for the endpoint; default = root (no data_key)
14# bookmark_query_field: From date-time field used for filtering the query
15# bookmark_type: Data type for bookmark, integer or datetime
16
17FILE_METADATA = {
18 "api": "files",
19 "path": "files/{spreadsheet_id}",
20 "key_properties": ["id"],
21 "replication_method": "FULL_TABLE",
22 "params": {
23 "fields": "id,name,createdTime,modifiedTime,version,teamDriveId,driveId,lastModifyingUser"
24 }
25}
26
27SPREADSHEET_METADATA = {
28 "api": "sheets",
29 "path": "spreadsheets/{spreadsheet_id}",
30 "key_properties": ["spreadsheetId"],
31 "replication_method": "FULL_TABLE",
32 "params": {
33 "includeGridData": "false"
34 }
35}
36
37SHEET_METADATA = {
38 "api": "sheets",
39 "path": "spreadsheets/{spreadsheet_id}",
40 "key_properties": ["sheetId"],
41 "replication_method": "FULL_TABLE",
42 "params": {
43 "includeGridData": "true",
44 "ranges": "'{sheet_title}'!1:2"
45 }
46}
47
48SHEETS_LOADED = {
49 "api": "sheets",
50 "path": "spreadsheets/{spreadsheet_id}/values/'{sheet_title}'!{range_rows}",
51 "data_key": "values",
52 "key_properties": ["spreadsheetId", "sheetId", "loadDate"],
53 "replication_method": "FULL_TABLE",
54 "params": {
55 "dateTimeRenderOption": "SERIAL_NUMBER",
56 "valueRenderOption": "UNFORMATTED_VALUE",
57 "majorDimension": "ROWS"
58 }
59}
60
61# Ensure streams are ordered logically
62STREAMS = OrderedDict()
63STREAMS['file_metadata'] = FILE_METADATA
64STREAMS['spreadsheet_metadata'] = SPREADSHEET_METADATA
65STREAMS['sheet_metadata'] = SHEET_METADATA
66STREAMS['sheets_loaded'] = SHEETS_LOADED
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
new file mode 100644
index 0000000..a8b02d0
--- /dev/null
+++ b/tap_google_sheets/sync.py
@@ -0,0 +1,281 @@
1import time
2import math
3import singer
4import json
5from collections import OrderedDict
6from singer import metrics, metadata, Transformer, utils
7from singer.utils import strptime_to_utc, strftime
8from tap_google_sheets.transform import transform_json
9from tap_google_sheets.streams import STREAMS
10from tap_google_sheets.schema import get_sheet_metadata
11
12LOGGER = singer.get_logger()
13
14
15def write_schema(catalog, stream_name):
16 stream = catalog.get_stream(stream_name)
17 schema = stream.schema.to_dict()
18 try:
19 singer.write_schema(stream_name, schema, stream.key_properties)
20 except OSError as err:
21 LOGGER.info('OS Error writing schema for: {}'.format(stream_name))
22 raise err
23
24
25def write_record(stream_name, record, time_extracted):
26 try:
27 singer.messages.write_record(stream_name, record, time_extracted=time_extracted)
28 except OSError as err:
29 LOGGER.info('OS Error writing record for: {}'.format(stream_name))
30 LOGGER.info('record: {}'.format(record))
31 raise err
32
33
34def get_bookmark(state, stream, default):
35 if (state is None) or ('bookmarks' not in state):
36 return default
37 return (
38 state
39 .get('bookmarks', {})
40 .get(stream, default)
41 )
42
43
44def write_bookmark(state, stream, value):
45 if 'bookmarks' not in state:
46 state['bookmarks'] = {}
47 state['bookmarks'][stream] = value
48 LOGGER.info('Write state for stream: {}, value: {}'.format(stream, value))
49 singer.write_state(state)
50
51
52# def transform_datetime(this_dttm):
53def transform_datetime(this_dttm):
54 with Transformer() as transformer:
55 new_dttm = transformer._transform_datetime(this_dttm)
56 return new_dttm
57
58
59def process_records(catalog, #pylint: disable=too-many-branches
60 stream_name,
61 records,
62 time_extracted,
63 bookmark_field=None,
64 bookmark_type=None,
65 max_bookmark_value=None,
66 last_datetime=None,
67 last_integer=None,
68 parent=None,
69 parent_id=None):
70 stream = catalog.get_stream(stream_name)
71 schema = stream.schema.to_dict()
72 stream_metadata = metadata.to_map(stream.metadata)
73
74 with metrics.record_counter(stream_name) as counter:
75 for record in records:
76 # If child object, add parent_id to record
77 if parent_id and parent:
78 record[parent + '_id'] = parent_id
79
80 # Transform record for Singer.io
81 with Transformer() as transformer:
82 transformed_record = transformer.transform(
83 record,
84 schema,
85 stream_metadata)
86 # Reset max_bookmark_value to new value if higher
87 if transformed_record.get(bookmark_field):
88 if max_bookmark_value is None or \
89 transformed_record[bookmark_field] > transform_datetime(max_bookmark_value):
90 max_bookmark_value = transformed_record[bookmark_field]
91
92 if bookmark_field and (bookmark_field in transformed_record):
93 if bookmark_type == 'integer':
94 # Keep only records whose bookmark is after the last_integer
95 if transformed_record[bookmark_field] >= last_integer:
96 write_record(stream_name, transformed_record, \
97 time_extracted=time_extracted)
98 counter.increment()
99 elif bookmark_type == 'datetime':
100 last_dttm = transform_datetime(last_datetime)
101 bookmark_dttm = transform_datetime(transformed_record[bookmark_field])
102 # Keep only records whose bookmark is after the last_datetime
103 if bookmark_dttm >= last_dttm:
104 write_record(stream_name, transformed_record, \
105 time_extracted=time_extracted)
106 counter.increment()
107 else:
108 write_record(stream_name, transformed_record, time_extracted=time_extracted)
109 counter.increment()
110
111 return max_bookmark_value, counter.value
112
113
114# Currently syncing sets the stream currently being delivered in the state.
115# If the integration is interrupted, this state property is used to identify
116# the starting point to continue from.
117# Reference: https://github.com/singer-io/singer-python/blob/master/singer/bookmarks.py#L41-L46
118def update_currently_syncing(state, stream_name):
119 if (stream_name is None) and ('currently_syncing' in state):
120 del state['currently_syncing']
121 else:
122 singer.set_currently_syncing(state, stream_name)
123 singer.write_state(state)
124
125
126# List selected fields from stream catalog
127def get_selected_fields(catalog, stream_name):
128 stream = catalog.get_stream(stream_name)
129 mdata = metadata.to_map(stream.metadata)
130 mdata_list = singer.metadata.to_list(mdata)
131 selected_fields = []
132 for entry in mdata_list:
133 field = None
134 try:
135 field = entry['breadcrumb'][1]
136 if entry.get('metadata', {}).get('selected', False):
137 selected_fields.append(field)
138 except IndexError:
139 pass
140 return selected_fields
141
142
143def get_data(stream_name,
144 endpoint_config,
145 client,
146 spreadsheet_id,
147 range_rows=None):
148 if not range_rows:
149 range_rows = ''
150 path = endpoint_config.get('path', stream_name).replace(
151 '{spreadsheet_id}', spreadsheet_id).replace('{sheet_title}', stream_name).replace(
152 '{range_rows}', range_rows)
153 params = endpoint_config.get('params', {})
154 api = endpoint_config.get('api', 'sheets')
155 querystring = '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()]).replace(
156 '{sheet_title}', stream_name)
157 data = {}
158 data = client.get(
159 path=path,
160 api=api,
161 params=querystring,
162 endpoint=stream_name)
163 return data
164
165
166def transform_file_metadata(file_metadata):
167 # Convert to dict
168 file_metadata_tf = json.loads(json.dumps(file_metadata))
169 # Remove keys
170 if file_metadata_tf.get('lastModifyingUser'):
171 file_metadata_tf['lastModifyingUser'].pop('photoLink', None)
172 file_metadata_tf['lastModifyingUser'].pop('me', None)
173 file_metadata_tf['lastModifyingUser'].pop('permissionId', None)
174 # Add record to an array of 1
175 file_metadata_arr = []
176 file_metadata_arr.append(file_metadata_tf)
177 return file_metadata_arr
178
179
180def transform_spreadsheet_metadata(spreadsheet_metadata):
181 # Convert to dict
182 spreadsheet_metadata_tf = json.loads(json.dumps(spreadsheet_metadata))
183 # Remove keys
184 if spreadsheet_metadata_tf.get('properties'):
185 spreadsheet_metadata_tf['properties'].pop('defaultFormat', None)
186 spreadsheet_metadata_tf.pop('sheets', None)
187 # Add record to an array of 1
188 spreadsheet_metadata_arr = []
189 spreadsheet_metadata_arr.append(spreadsheet_metadata_tf)
190 return spreadsheet_metadata_arr
191
192
193def transform_sheet_metadata(spreadsheet_id, sheet, columns):
194 # Convert to properties to dict
195 sheet_metadata = sheet.get('properties')
196 sheet_metadata_tf = json.loads(json.dumps(sheet_metadata))
197 sheet_id = sheet_metadata_tf.get('sheetId')
198 sheet_url = 'https://docs.google.com/spreadsheets/d/{}/edit#gid={}'.format(
199 spreadsheet_id, sheet_id)
200 sheet_metadata_tf['spreadsheetId'] = spreadsheet_id
201 sheet_metadata_tf['sheetUrl'] = sheet_url
202 sheet_metadata_tf['columns'] = columns
203 return sheet_metadata_tf
204
205
206def sync(client, config, catalog, state):
207 start_date = config.get('start_date')
208 spreadsheet_id = config.get('spreadsheet_id')
209
210 # Get selected_streams from catalog, based on state last_stream
211 # last_stream = Previous currently synced stream, if the load was interrupted
212 last_stream = singer.get_currently_syncing(state)
213 LOGGER.info('last/currently syncing stream: {}'.format(last_stream))
214 selected_streams = []
215 for stream in catalog.get_selected_streams(state):
216 selected_streams.append(stream.stream)
217 LOGGER.info('selected_streams: {}'.format(selected_streams))
218
219 if not selected_streams:
220 return
221
222 # Get file_metadata
223 file_metadata = {}
224 file_metadata_config = STREAMS.get('file_metadata')
225 file_metadata = get_data('file_metadata', file_metadata_config, client, spreadsheet_id)
226 file_metadata_tf = transform_file_metadata(file_metadata)
227 # LOGGER.info('file_metadata_tf = {}'.format(file_metadata_tf))
228 last_datetime = strptime_to_utc(get_bookmark(state, 'file_metadata', start_date))
229 this_datetime = strptime_to_utc(file_metadata.get('modifiedTime'))
230 LOGGER.info('last_datetime = {}, this_datetime = {}'.format(last_datetime, this_datetime))
231 if this_datetime <= last_datetime:
232 LOGGER.info('this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.')
233 return 0
234
235 # Get spreadsheet_metadata
236 spreadsheet_metadata = {}
237 spreadsheet_metadata_config = STREAMS.get('spreadsheet_metadata')
238 spreadsheet_metadata = get_data('spreadsheet_metadata', spreadsheet_metadata_config, client, spreadsheet_id)
239 spreadsheet_metadata_tf = transform_spreadsheet_metadata(spreadsheet_metadata)
240 # LOGGER.info('spreadsheet_metadata_tf = {}'.format(spreadsheet_metadata_tf))
241
242 # Get sheet_metadata
243 sheets = spreadsheet_metadata.get('sheets')
244 sheet_metadata = []
245 sheets_loaded = []
246 sheets_loaded_config = STREAMS['sheets_loaded']
247 if sheets:
248 for sheet in sheets:
249 sheet_title = sheet.get('properties', {}).get('title')
250 sheet_schema, columns = get_sheet_metadata(sheet, spreadsheet_id, client)
251 sheet_metadata_tf = transform_sheet_metadata(spreadsheet_id, sheet, columns)
252 # LOGGER.info('sheet_metadata_tf = {}'.format(sheet_metadata_tf))
253 sheet_metadata.append(sheet_metadata_tf)
254
255 # Determine range of rows and columns for "paging" through batch rows of data
256 sheet_last_col_index = 1
257 sheet_last_col_letter = 'A'
258 for col in columns:
259 col_index = col.get('columnIndex')
260 col_letter = col.get('columnLetter')
261 if col_index > sheet_last_col_index:
262 sheet_last_col_index = col_index
263 sheet_last_col_letter = col_letter
264 sheet_max_row = sheet.get('gridProperties', {}).get('rowCount')
265 is_empty_row = False
266 batch_rows = 200
267 from_row = 2
268 if sheet_max_row < batch_rows:
269 to_row = sheet_max_row
270 else:
271 to_row = batch_rows
272
273 while not is_empty_row and to_row <= sheet_max_row:
274 range_rows = 'A2:{}{}'.format(sheet_last_col_letter, to_row)
275
276 sheet_data = get_data(
277 stream_name=sheet_title,
278 endpoint_config=sheets_loaded_config,
279 client=client,
280 spreadsheet_id=spreadsheet_id,
281 range_rows=range_rows)