diff options
-rw-r--r-- | docs/__init__.html | 225 | ||||
-rw-r--r-- | docs/client.html | 1201 | ||||
-rw-r--r-- | docs/discover.html | 127 | ||||
-rw-r--r-- | docs/pycco.css | 191 | ||||
-rw-r--r-- | docs/schema.html | 949 | ||||
-rw-r--r-- | docs/streams.html | 185 | ||||
-rw-r--r-- | docs/sync.html | 1680 |
7 files changed, 4558 insertions, 0 deletions
diff --git a/docs/__init__.html b/docs/__init__.html new file mode 100644 index 0000000..eb5d3b3 --- /dev/null +++ b/docs/__init__.html | |||
@@ -0,0 +1,225 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>__init__.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>__init__.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | <p>This project syncs data from the v4 Google Sheets API.</p> | ||
21 | <h1>Discovery Mode</h1> | ||
22 | <p>There are a few static streams (<code>"file_metadata"</code>, <code>"spreadsheet_metadata"</code>, <code>"sheet_metadata"</code>, | ||
23 | <code>"sheets_loaded"</code>) and any number of dynamic streams. There’s one dynamic stream per sheet in the | ||
24 | one Google Sheets Doc.</p> | ||
25 | <h1>Sync Mode</h1> | ||
26 | </div> | ||
27 | <div class='code'> | ||
28 | <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">sys</span> | ||
29 | <span class="kn">import</span> <span class="nn">json</span> | ||
30 | <span class="kn">import</span> <span class="nn">argparse</span> <span class="c1"># unused import</span> | ||
31 | <span class="kn">import</span> <span class="nn">singer</span> | ||
32 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">metadata</span><span class="p">,</span> <span class="n">utils</span> | ||
33 | <span class="kn">from</span> <span class="nn">tap_google_sheets.client</span> <span class="kn">import</span> <span class="n">GoogleClient</span> | ||
34 | <span class="kn">from</span> <span class="nn">tap_google_sheets.discover</span> <span class="kn">import</span> <span class="n">discover</span> | ||
35 | <span class="kn">from</span> <span class="nn">tap_google_sheets.sync</span> <span class="kn">import</span> <span class="n">sync</span> | ||
36 | |||
37 | <span class="n">LOGGER</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span></pre></div> | ||
38 | </div> | ||
39 | </div> | ||
40 | <div class='clearall'></div> | ||
41 | <div class='section' id='section-1'> | ||
42 | <div class='docs'> | ||
43 | <div class='octowrap'> | ||
44 | <a class='octothorpe' href='#section-1'>#</a> | ||
45 | </div> | ||
46 | <h1>Configuration</h1> | ||
47 | </div> | ||
48 | <div class='code'> | ||
49 | <div class="highlight"><pre></pre></div> | ||
50 | </div> | ||
51 | </div> | ||
52 | <div class='clearall'></div> | ||
53 | <div class='section' id='section-2'> | ||
54 | <div class='docs'> | ||
55 | <div class='octowrap'> | ||
56 | <a class='octothorpe' href='#section-2'>#</a> | ||
57 | </div> | ||
58 | <p>This is a typical OAuth2 tap. So in a config file we expect the following keys.</p> | ||
59 | <ul> | ||
60 | <li> | ||
61 | <p>OAuth Related:</p> | ||
62 | <ul> | ||
63 | <li><code>client_id</code></li> | ||
64 | <li><code>client_secret</code></li> | ||
65 | <li><code>refresh_token</code></li> | ||
66 | </ul> | ||
67 | </li> | ||
68 | <li> | ||
69 | <p>Tap related:</p> | ||
70 | <ul> | ||
71 | <li><code>spreadsheet_id</code></li> | ||
72 | <li><code>start_date</code></li> | ||
73 | <li><code>user_agent</code></li> | ||
74 | </ul> | ||
75 | </li> | ||
76 | </ul> | ||
77 | </div> | ||
78 | <div class='code'> | ||
79 | <div class="highlight"><pre><span class="n">REQUIRED_CONFIG_KEYS</span> <span class="o">=</span> <span class="p">[</span> | ||
80 | <span class="s1">'client_id'</span><span class="p">,</span> | ||
81 | <span class="s1">'client_secret'</span><span class="p">,</span> | ||
82 | <span class="s1">'refresh_token'</span><span class="p">,</span> | ||
83 | <span class="s1">'spreadsheet_id'</span><span class="p">,</span> | ||
84 | <span class="s1">'start_date'</span><span class="p">,</span> | ||
85 | <span class="s1">'user_agent'</span> | ||
86 | <span class="p">]</span></pre></div> | ||
87 | </div> | ||
88 | </div> | ||
89 | <div class='clearall'></div> | ||
90 | <div class='section' id='section-3'> | ||
91 | <div class='docs'> | ||
92 | <div class='octowrap'> | ||
93 | <a class='octothorpe' href='#section-3'>#</a> | ||
94 | </div> | ||
95 | <h1>Discovery Mode</h1> | ||
96 | </div> | ||
97 | <div class='code'> | ||
98 | <div class="highlight"><pre></pre></div> | ||
99 | </div> | ||
100 | </div> | ||
101 | <div class='clearall'></div> | ||
102 | <div class='section' id='section-4'> | ||
103 | <div class='docs'> | ||
104 | <div class='octowrap'> | ||
105 | <a class='octothorpe' href='#section-4'>#</a> | ||
106 | </div> | ||
107 | <p>Creates a Singer Catalog and writes it to STDOUT</p> | ||
108 | </div> | ||
109 | <div class='code'> | ||
110 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">do_discover</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">):</span></pre></div> | ||
111 | </div> | ||
112 | </div> | ||
113 | <div class='clearall'></div> | ||
114 | <div class='section' id='section-5'> | ||
115 | <div class='docs'> | ||
116 | <div class='octowrap'> | ||
117 | <a class='octothorpe' href='#section-5'>#</a> | ||
118 | </div> | ||
119 | <p>Inputs:</p> | ||
120 | <ul> | ||
121 | <li><code>client</code></li> | ||
122 | <li>An instance of the GoogleClient class</li> | ||
123 | <li><code>spreadsheet_id</code></li> | ||
124 | <li>The id of the Google Sheet</li> | ||
125 | </ul> | ||
126 | <p>Returns:</p> | ||
127 | <ul> | ||
128 | <li>None</li> | ||
129 | </ul> | ||
130 | <p>Side Effects:</p> | ||
131 | <ul> | ||
132 | <li>Writes to STDOUT</li> | ||
133 | </ul> | ||
134 | </div> | ||
135 | <div class='code'> | ||
136 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Starting discover'</span><span class="p">)</span> | ||
137 | <span class="n">catalog</span> <span class="o">=</span> <span class="n">discover</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">)</span> | ||
138 | <span class="n">json</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">catalog</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(),</span> <span class="n">sys</span><span class="o">.</span><span class="n">stdout</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> | ||
139 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Finished discover'</span><span class="p">)</span></pre></div> | ||
140 | </div> | ||
141 | </div> | ||
142 | <div class='clearall'></div> | ||
143 | <div class='section' id='section-6'> | ||
144 | <div class='docs'> | ||
145 | <div class='octowrap'> | ||
146 | <a class='octothorpe' href='#section-6'>#</a> | ||
147 | </div> | ||
148 | <h1>Entrypoint</h1> | ||
149 | </div> | ||
150 | <div class='code'> | ||
151 | <div class="highlight"><pre></pre></div> | ||
152 | </div> | ||
153 | </div> | ||
154 | <div class='clearall'></div> | ||
155 | <div class='section' id='section-7'> | ||
156 | <div class='docs'> | ||
157 | <div class='octowrap'> | ||
158 | <a class='octothorpe' href='#section-7'>#</a> | ||
159 | </div> | ||
160 | <p>Read a config, then run discovery mode or sync mode</p> | ||
161 | </div> | ||
162 | <div class='code'> | ||
163 | <div class="highlight"><pre><span class="nd">@singer</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">handle_top_exception</span><span class="p">(</span><span class="n">LOGGER</span><span class="p">)</span> | ||
164 | <span class="k">def</span> <span class="nf">main</span><span class="p">():</span></pre></div> | ||
165 | </div> | ||
166 | </div> | ||
167 | <div class='clearall'></div> | ||
168 | <div class='section' id='section-8'> | ||
169 | <div class='docs'> | ||
170 | <div class='octowrap'> | ||
171 | <a class='octothorpe' href='#section-8'>#</a> | ||
172 | </div> | ||
173 | <p>Inputs:</p> | ||
174 | <ul> | ||
175 | <li>None</li> | ||
176 | </ul> | ||
177 | <p>Returns:</p> | ||
178 | <ul> | ||
179 | <li>None</li> | ||
180 | </ul> | ||
181 | <p>Side Effects:</p> | ||
182 | <ul> | ||
183 | <li>Writes to STDOUT</li> | ||
184 | </ul> | ||
185 | </div> | ||
186 | <div class='code'> | ||
187 | <div class="highlight"><pre> <span class="n">parsed_args</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">parse_args</span><span class="p">(</span><span class="n">REQUIRED_CONFIG_KEYS</span><span class="p">)</span> | ||
188 | |||
189 | <span class="k">with</span> <span class="n">GoogleClient</span><span class="p">(</span><span class="n">parsed_args</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s1">'access_token'</span><span class="p">],</span> | ||
190 | <span class="n">parsed_args</span><span class="o">.</span><span class="n">config</span><span class="p">[</span><span class="s1">'user_agent'</span><span class="p">])</span> <span class="k">as</span> <span class="n">client</span><span class="p">:</span> | ||
191 | |||
192 | <span class="n">state</span> <span class="o">=</span> <span class="p">{}</span> | ||
193 | <span class="k">if</span> <span class="n">parsed_args</span><span class="o">.</span><span class="n">state</span><span class="p">:</span> | ||
194 | <span class="n">state</span> <span class="o">=</span> <span class="n">parsed_args</span><span class="o">.</span><span class="n">state</span> | ||
195 | |||
196 | <span class="n">config</span> <span class="o">=</span> <span class="n">parsed_args</span><span class="o">.</span><span class="n">config</span> | ||
197 | <span class="n">spreadsheet_id</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'spreadsheet_id'</span><span class="p">)</span> | ||
198 | |||
199 | <span class="k">if</span> <span class="n">parsed_args</span><span class="o">.</span><span class="n">discover</span><span class="p">:</span> | ||
200 | <span class="n">do_discover</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">)</span> | ||
201 | <span class="k">elif</span> <span class="n">parsed_args</span><span class="o">.</span><span class="n">catalog</span><span class="p">:</span> | ||
202 | <span class="n">sync</span><span class="p">(</span><span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
203 | <span class="n">config</span><span class="o">=</span><span class="n">config</span><span class="p">,</span> | ||
204 | <span class="n">catalog</span><span class="o">=</span><span class="n">parsed_args</span><span class="o">.</span><span class="n">catalog</span><span class="p">,</span> | ||
205 | <span class="n">state</span><span class="o">=</span><span class="n">state</span><span class="p">)</span></pre></div> | ||
206 | </div> | ||
207 | </div> | ||
208 | <div class='clearall'></div> | ||
209 | <div class='section' id='section-9'> | ||
210 | <div class='docs'> | ||
211 | <div class='octowrap'> | ||
212 | <a class='octothorpe' href='#section-9'>#</a> | ||
213 | </div> | ||
214 | <p>Unused</p> | ||
215 | </div> | ||
216 | <div class='code'> | ||
217 | <div class="highlight"><pre><span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s1">'__main__'</span><span class="p">:</span> | ||
218 | <span class="n">main</span><span class="p">()</span> | ||
219 | |||
220 | </pre></div> | ||
221 | </div> | ||
222 | </div> | ||
223 | <div class='clearall'></div> | ||
224 | </div> | ||
225 | </body> | ||
diff --git a/docs/client.html b/docs/client.html new file mode 100644 index 0000000..4e69b89 --- /dev/null +++ b/docs/client.html | |||
@@ -0,0 +1,1201 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>client.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>client.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | |||
21 | </div> | ||
22 | <div class='code'> | ||
23 | <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> | ||
24 | <span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span> | ||
25 | <span class="kn">import</span> <span class="nn">backoff</span> | ||
26 | <span class="kn">import</span> <span class="nn">requests</span> | ||
27 | <span class="kn">import</span> <span class="nn">singer</span> | ||
28 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">metrics</span> | ||
29 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">utils</span> | ||
30 | |||
31 | <span class="n">BASE_URL</span> <span class="o">=</span> <span class="s1">'https://www.googleapis.com'</span> | ||
32 | <span class="n">GOOGLE_TOKEN_URI</span> <span class="o">=</span> <span class="s1">'https://oauth2.googleapis.com/token'</span> | ||
33 | <span class="n">LOGGER</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span></pre></div> | ||
34 | </div> | ||
35 | </div> | ||
36 | <div class='clearall'></div> | ||
37 | <div class='section' id='section-1'> | ||
38 | <div class='docs'> | ||
39 | <div class='octowrap'> | ||
40 | <a class='octothorpe' href='#section-1'>#</a> | ||
41 | </div> | ||
42 | |||
43 | </div> | ||
44 | <div class='code'> | ||
45 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">Server5xxError</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> | ||
46 | <span class="k">pass</span></pre></div> | ||
47 | </div> | ||
48 | </div> | ||
49 | <div class='clearall'></div> | ||
50 | <div class='section' id='section-2'> | ||
51 | <div class='docs'> | ||
52 | <div class='octowrap'> | ||
53 | <a class='octothorpe' href='#section-2'>#</a> | ||
54 | </div> | ||
55 | |||
56 | </div> | ||
57 | <div class='code'> | ||
58 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">Server429Error</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> | ||
59 | <span class="k">pass</span></pre></div> | ||
60 | </div> | ||
61 | </div> | ||
62 | <div class='clearall'></div> | ||
63 | <div class='section' id='section-3'> | ||
64 | <div class='docs'> | ||
65 | <div class='octowrap'> | ||
66 | <a class='octothorpe' href='#section-3'>#</a> | ||
67 | </div> | ||
68 | |||
69 | </div> | ||
70 | <div class='code'> | ||
71 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleError</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span> | ||
72 | <span class="k">pass</span></pre></div> | ||
73 | </div> | ||
74 | </div> | ||
75 | <div class='clearall'></div> | ||
76 | <div class='section' id='section-4'> | ||
77 | <div class='docs'> | ||
78 | <div class='octowrap'> | ||
79 | <a class='octothorpe' href='#section-4'>#</a> | ||
80 | </div> | ||
81 | |||
82 | </div> | ||
83 | <div class='code'> | ||
84 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleBadRequestError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
85 | <span class="k">pass</span></pre></div> | ||
86 | </div> | ||
87 | </div> | ||
88 | <div class='clearall'></div> | ||
89 | <div class='section' id='section-5'> | ||
90 | <div class='docs'> | ||
91 | <div class='octowrap'> | ||
92 | <a class='octothorpe' href='#section-5'>#</a> | ||
93 | </div> | ||
94 | |||
95 | </div> | ||
96 | <div class='code'> | ||
97 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleUnauthorizedError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
98 | <span class="k">pass</span></pre></div> | ||
99 | </div> | ||
100 | </div> | ||
101 | <div class='clearall'></div> | ||
102 | <div class='section' id='section-6'> | ||
103 | <div class='docs'> | ||
104 | <div class='octowrap'> | ||
105 | <a class='octothorpe' href='#section-6'>#</a> | ||
106 | </div> | ||
107 | |||
108 | </div> | ||
109 | <div class='code'> | ||
110 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GooglePaymentRequiredError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
111 | <span class="k">pass</span></pre></div> | ||
112 | </div> | ||
113 | </div> | ||
114 | <div class='clearall'></div> | ||
115 | <div class='section' id='section-7'> | ||
116 | <div class='docs'> | ||
117 | <div class='octowrap'> | ||
118 | <a class='octothorpe' href='#section-7'>#</a> | ||
119 | </div> | ||
120 | |||
121 | </div> | ||
122 | <div class='code'> | ||
123 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleNotFoundError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
124 | <span class="k">pass</span></pre></div> | ||
125 | </div> | ||
126 | </div> | ||
127 | <div class='clearall'></div> | ||
128 | <div class='section' id='section-8'> | ||
129 | <div class='docs'> | ||
130 | <div class='octowrap'> | ||
131 | <a class='octothorpe' href='#section-8'>#</a> | ||
132 | </div> | ||
133 | |||
134 | </div> | ||
135 | <div class='code'> | ||
136 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleMethodNotAllowedError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
137 | <span class="k">pass</span></pre></div> | ||
138 | </div> | ||
139 | </div> | ||
140 | <div class='clearall'></div> | ||
141 | <div class='section' id='section-9'> | ||
142 | <div class='docs'> | ||
143 | <div class='octowrap'> | ||
144 | <a class='octothorpe' href='#section-9'>#</a> | ||
145 | </div> | ||
146 | |||
147 | </div> | ||
148 | <div class='code'> | ||
149 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleConflictError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
150 | <span class="k">pass</span></pre></div> | ||
151 | </div> | ||
152 | </div> | ||
153 | <div class='clearall'></div> | ||
154 | <div class='section' id='section-10'> | ||
155 | <div class='docs'> | ||
156 | <div class='octowrap'> | ||
157 | <a class='octothorpe' href='#section-10'>#</a> | ||
158 | </div> | ||
159 | |||
160 | </div> | ||
161 | <div class='code'> | ||
162 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleGoneError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
163 | <span class="k">pass</span></pre></div> | ||
164 | </div> | ||
165 | </div> | ||
166 | <div class='clearall'></div> | ||
167 | <div class='section' id='section-11'> | ||
168 | <div class='docs'> | ||
169 | <div class='octowrap'> | ||
170 | <a class='octothorpe' href='#section-11'>#</a> | ||
171 | </div> | ||
172 | |||
173 | </div> | ||
174 | <div class='code'> | ||
175 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GooglePreconditionFailedError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
176 | <span class="k">pass</span></pre></div> | ||
177 | </div> | ||
178 | </div> | ||
179 | <div class='clearall'></div> | ||
180 | <div class='section' id='section-12'> | ||
181 | <div class='docs'> | ||
182 | <div class='octowrap'> | ||
183 | <a class='octothorpe' href='#section-12'>#</a> | ||
184 | </div> | ||
185 | |||
186 | </div> | ||
187 | <div class='code'> | ||
188 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleRequestEntityTooLargeError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
189 | <span class="k">pass</span></pre></div> | ||
190 | </div> | ||
191 | </div> | ||
192 | <div class='clearall'></div> | ||
193 | <div class='section' id='section-13'> | ||
194 | <div class='docs'> | ||
195 | <div class='octowrap'> | ||
196 | <a class='octothorpe' href='#section-13'>#</a> | ||
197 | </div> | ||
198 | |||
199 | </div> | ||
200 | <div class='code'> | ||
201 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleRequestedRangeNotSatisfiableError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
202 | <span class="k">pass</span></pre></div> | ||
203 | </div> | ||
204 | </div> | ||
205 | <div class='clearall'></div> | ||
206 | <div class='section' id='section-14'> | ||
207 | <div class='docs'> | ||
208 | <div class='octowrap'> | ||
209 | <a class='octothorpe' href='#section-14'>#</a> | ||
210 | </div> | ||
211 | |||
212 | </div> | ||
213 | <div class='code'> | ||
214 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleExpectationFailedError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
215 | <span class="k">pass</span></pre></div> | ||
216 | </div> | ||
217 | </div> | ||
218 | <div class='clearall'></div> | ||
219 | <div class='section' id='section-15'> | ||
220 | <div class='docs'> | ||
221 | <div class='octowrap'> | ||
222 | <a class='octothorpe' href='#section-15'>#</a> | ||
223 | </div> | ||
224 | |||
225 | </div> | ||
226 | <div class='code'> | ||
227 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleForbiddenError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
228 | <span class="k">pass</span></pre></div> | ||
229 | </div> | ||
230 | </div> | ||
231 | <div class='clearall'></div> | ||
232 | <div class='section' id='section-16'> | ||
233 | <div class='docs'> | ||
234 | <div class='octowrap'> | ||
235 | <a class='octothorpe' href='#section-16'>#</a> | ||
236 | </div> | ||
237 | |||
238 | </div> | ||
239 | <div class='code'> | ||
240 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleUnprocessableEntityError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
241 | <span class="k">pass</span></pre></div> | ||
242 | </div> | ||
243 | </div> | ||
244 | <div class='clearall'></div> | ||
245 | <div class='section' id='section-17'> | ||
246 | <div class='docs'> | ||
247 | <div class='octowrap'> | ||
248 | <a class='octothorpe' href='#section-17'>#</a> | ||
249 | </div> | ||
250 | |||
251 | </div> | ||
252 | <div class='code'> | ||
253 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GooglePreconditionRequiredError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
254 | <span class="k">pass</span></pre></div> | ||
255 | </div> | ||
256 | </div> | ||
257 | <div class='clearall'></div> | ||
258 | <div class='section' id='section-18'> | ||
259 | <div class='docs'> | ||
260 | <div class='octowrap'> | ||
261 | <a class='octothorpe' href='#section-18'>#</a> | ||
262 | </div> | ||
263 | |||
264 | </div> | ||
265 | <div class='code'> | ||
266 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleInternalServiceError</span><span class="p">(</span><span class="n">GoogleError</span><span class="p">):</span> | ||
267 | <span class="k">pass</span></pre></div> | ||
268 | </div> | ||
269 | </div> | ||
270 | <div class='clearall'></div> | ||
271 | <div class='section' id='section-19'> | ||
272 | <div class='docs'> | ||
273 | <div class='octowrap'> | ||
274 | <a class='octothorpe' href='#section-19'>#</a> | ||
275 | </div> | ||
276 | <p>Error Codes: https://developers.google.com/webmaster-tools/search-console-api-original/v3/errors</p> | ||
277 | </div> | ||
278 | <div class='code'> | ||
279 | <div class="highlight"><pre><span class="n">ERROR_CODE_EXCEPTION_MAPPING</span> <span class="o">=</span> <span class="p">{</span> | ||
280 | <span class="mi">400</span><span class="p">:</span> <span class="n">GoogleBadRequestError</span><span class="p">,</span> | ||
281 | <span class="mi">401</span><span class="p">:</span> <span class="n">GoogleUnauthorizedError</span><span class="p">,</span> | ||
282 | <span class="mi">402</span><span class="p">:</span> <span class="n">GooglePaymentRequiredError</span><span class="p">,</span> | ||
283 | <span class="mi">403</span><span class="p">:</span> <span class="n">GoogleForbiddenError</span><span class="p">,</span> | ||
284 | <span class="mi">404</span><span class="p">:</span> <span class="n">GoogleNotFoundError</span><span class="p">,</span> | ||
285 | <span class="mi">405</span><span class="p">:</span> <span class="n">GoogleMethodNotAllowedError</span><span class="p">,</span> | ||
286 | <span class="mi">409</span><span class="p">:</span> <span class="n">GoogleConflictError</span><span class="p">,</span> | ||
287 | <span class="mi">410</span><span class="p">:</span> <span class="n">GoogleGoneError</span><span class="p">,</span> | ||
288 | <span class="mi">412</span><span class="p">:</span> <span class="n">GooglePreconditionFailedError</span><span class="p">,</span> | ||
289 | <span class="mi">413</span><span class="p">:</span> <span class="n">GoogleRequestEntityTooLargeError</span><span class="p">,</span> | ||
290 | <span class="mi">416</span><span class="p">:</span> <span class="n">GoogleRequestedRangeNotSatisfiableError</span><span class="p">,</span> | ||
291 | <span class="mi">417</span><span class="p">:</span> <span class="n">GoogleExpectationFailedError</span><span class="p">,</span> | ||
292 | <span class="mi">422</span><span class="p">:</span> <span class="n">GoogleUnprocessableEntityError</span><span class="p">,</span> | ||
293 | <span class="mi">428</span><span class="p">:</span> <span class="n">GooglePreconditionRequiredError</span><span class="p">,</span> | ||
294 | <span class="mi">500</span><span class="p">:</span> <span class="n">GoogleInternalServiceError</span><span class="p">}</span></pre></div> | ||
295 | </div> | ||
296 | </div> | ||
297 | <div class='clearall'></div> | ||
298 | <div class='section' id='section-20'> | ||
299 | <div class='docs'> | ||
300 | <div class='octowrap'> | ||
301 | <a class='octothorpe' href='#section-20'>#</a> | ||
302 | </div> | ||
303 | |||
304 | </div> | ||
305 | <div class='code'> | ||
306 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_exception_for_error_code</span><span class="p">(</span><span class="n">error_code</span><span class="p">):</span> | ||
307 | <span class="k">return</span> <span class="n">ERROR_CODE_EXCEPTION_MAPPING</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">error_code</span><span class="p">,</span> <span class="n">GoogleError</span><span class="p">)</span></pre></div> | ||
308 | </div> | ||
309 | </div> | ||
310 | <div class='clearall'></div> | ||
311 | <div class='section' id='section-21'> | ||
312 | <div class='docs'> | ||
313 | <div class='octowrap'> | ||
314 | <a class='octothorpe' href='#section-21'>#</a> | ||
315 | </div> | ||
316 | <hr /> | ||
317 | </div> | ||
318 | <div class='code'> | ||
319 | <div class="highlight"><pre></pre></div> | ||
320 | </div> | ||
321 | </div> | ||
322 | <div class='clearall'></div> | ||
323 | <div class='section' id='section-22'> | ||
324 | <div class='docs'> | ||
325 | <div class='octowrap'> | ||
326 | <a class='octothorpe' href='#section-22'>#</a> | ||
327 | </div> | ||
328 | <p><code>client.py:raise_for_error()</code> calls the <code>raise_for_status()</code> function from the <code>requests</code> library. | ||
329 | and catches all <code>requests.HTTPError</code> and <code>requests.ConnectionError</code>. Note the name difference.</p> | ||
330 | </div> | ||
331 | <div class='code'> | ||
332 | <div class="highlight"><pre></pre></div> | ||
333 | </div> | ||
334 | </div> | ||
335 | <div class='clearall'></div> | ||
336 | <div class='section' id='section-23'> | ||
337 | <div class='docs'> | ||
338 | <div class='octowrap'> | ||
339 | <a class='octothorpe' href='#section-23'>#</a> | ||
340 | </div> | ||
341 | <h5>Thoughts</h5> | ||
342 | <p>I believe there are 5 ways to leave this function. It’s worth skimming this just to understand the | ||
343 | structure. I’ll note below, but I think there’s just two ways to leave this function.</p> | ||
344 | </div> | ||
345 | <div class='code'> | ||
346 | <div class="highlight"><pre></pre></div> | ||
347 | </div> | ||
348 | </div> | ||
349 | <div class='clearall'></div> | ||
350 | <div class='section' id='section-24'> | ||
351 | <div class='docs'> | ||
352 | <div class='octowrap'> | ||
353 | <a class='octothorpe' href='#section-24'>#</a> | ||
354 | </div> | ||
355 | <ol> | ||
356 | <li>If the length of the response content is 0, then we just leave<ul> | ||
357 | <li>I believe this results in us swallowing the <code>requests.HTTPError</code> and successfully returns to | ||
358 | the calling function</li> | ||
359 | <li>I believe it’s possible to leave the function this way</li> | ||
360 | </ul> | ||
361 | </li> | ||
362 | <li>If you can call <code>response.json()</code>, then we attempt to create a specific error message via | ||
363 | <code>client.py:get_exception_for_error_code()</code>, which just looks up a code found in | ||
364 | <code>response.json()</code><ul> | ||
365 | <li>I am not convinced this ever works for this tap because my understanding of | ||
366 | <code>raise_for_status()</code> is if you <code>raise_for_status()</code> is unsuccessful then <code>response.json()</code> | ||
367 | will also be unsuccessful. So, because we are in the exception handling for | ||
368 | <code>raise_for_status()</code> I think we never make it past <code>response = response.json()</code> on | ||
369 | <code>client.py:118</code></li> | ||
370 | <li>I believe it’s possible to leave the function this way</li> | ||
371 | </ul> | ||
372 | </li> | ||
373 | <li>Assuming <code>response.json()</code> does fail, then that function will raise a | ||
374 | <code>simplejson.scanner.JSONDecodeError</code> with an error message like <code>"Expecting value: line 1 | ||
375 | column 1 (char 0)"</code></li> | ||
376 | <li>Assuming <code>response.json()</code> worked, but it’s lacks an <code>error</code> key and lacks an <code>errorCode</code> key, | ||
377 | we re-raise whatever was caught from <code>raise_for_status()</code></li> | ||
378 | <li>We also re-raise whatever was caught from <code>raise_for_status()</code> if a <code>ValueError</code> or <code>TypeError</code> | ||
379 | occurs in trying to handle the <code>raise_for_status()</code> error</li> | ||
380 | </ol> | ||
381 | </div> | ||
382 | <div class='code'> | ||
383 | <div class="highlight"><pre></pre></div> | ||
384 | </div> | ||
385 | </div> | ||
386 | <div class='clearall'></div> | ||
387 | <div class='section' id='section-25'> | ||
388 | <div class='docs'> | ||
389 | <div class='octowrap'> | ||
390 | <a class='octothorpe' href='#section-25'>#</a> | ||
391 | </div> | ||
392 | <hr /> | ||
393 | </div> | ||
394 | <div class='code'> | ||
395 | <div class="highlight"><pre></pre></div> | ||
396 | </div> | ||
397 | </div> | ||
398 | <div class='clearall'></div> | ||
399 | <div class='section' id='section-26'> | ||
400 | <div class='docs'> | ||
401 | <div class='octowrap'> | ||
402 | <a class='octothorpe' href='#section-26'>#</a> | ||
403 | </div> | ||
404 | <p>Try to catch API errors to rethrow as tap specific errors</p> | ||
405 | </div> | ||
406 | <div class='code'> | ||
407 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">raise_for_error</span><span class="p">(</span><span class="n">response</span><span class="p">):</span></pre></div> | ||
408 | </div> | ||
409 | </div> | ||
410 | <div class='clearall'></div> | ||
411 | <div class='section' id='section-27'> | ||
412 | <div class='docs'> | ||
413 | <div class='octowrap'> | ||
414 | <a class='octothorpe' href='#section-27'>#</a> | ||
415 | </div> | ||
416 | <p>Inputs:</p> | ||
417 | <ul> | ||
418 | <li><code>response</code>: A requests.Response object</li> | ||
419 | </ul> | ||
420 | <p>Returns:</p> | ||
421 | <ul> | ||
422 | <li>None</li> | ||
423 | </ul> | ||
424 | <p>Side Effects:</p> | ||
425 | <ul> | ||
426 | <li>Raises a GoogleError</li> | ||
427 | </ul> | ||
428 | </div> | ||
429 | <div class='code'> | ||
430 | <div class="highlight"><pre> <span class="k">try</span><span class="p">:</span> | ||
431 | <span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span> | ||
432 | <span class="k">except</span> <span class="p">(</span><span class="n">requests</span><span class="o">.</span><span class="n">HTTPError</span><span class="p">,</span> <span class="n">requests</span><span class="o">.</span><span class="n">ConnectionError</span><span class="p">)</span> <span class="k">as</span> <span class="n">error</span><span class="p">:</span> | ||
433 | <span class="k">try</span><span class="p">:</span> | ||
434 | <span class="n">content_length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">content</span><span class="p">)</span> | ||
435 | <span class="k">if</span> <span class="n">content_length</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | ||
436 | <span class="k">return</span> | ||
437 | <span class="n">response</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> | ||
438 | <span class="k">if</span> <span class="p">(</span><span class="s1">'error'</span> <span class="ow">in</span> <span class="n">response</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="s1">'errorCode'</span> <span class="ow">in</span> <span class="n">response</span><span class="p">):</span> | ||
439 | <span class="n">message</span> <span class="o">=</span> <span class="s1">'</span><span class="si">%s</span><span class="s1">: </span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'error'</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">error</span><span class="p">)),</span> | ||
440 | <span class="n">response</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'message'</span><span class="p">,</span> <span class="s1">'Unknown Error'</span><span class="p">))</span> | ||
441 | <span class="n">error_code</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'error'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'code'</span><span class="p">)</span> | ||
442 | <span class="n">ex</span> <span class="o">=</span> <span class="n">get_exception_for_error_code</span><span class="p">(</span><span class="n">error_code</span><span class="p">)</span> | ||
443 | <span class="k">raise</span> <span class="n">ex</span><span class="p">(</span><span class="n">message</span><span class="p">)</span> | ||
444 | <span class="k">raise</span> <span class="n">GoogleError</span><span class="p">(</span><span class="n">error</span><span class="p">)</span> | ||
445 | <span class="k">except</span> <span class="p">(</span><span class="ne">ValueError</span><span class="p">,</span> <span class="ne">TypeError</span><span class="p">):</span> | ||
446 | <span class="k">raise</span> <span class="n">GoogleError</span><span class="p">(</span><span class="n">error</span><span class="p">)</span></pre></div> | ||
447 | </div> | ||
448 | </div> | ||
449 | <div class='clearall'></div> | ||
450 | <div class='section' id='section-28'> | ||
451 | <div class='docs'> | ||
452 | <div class='octowrap'> | ||
453 | <a class='octothorpe' href='#section-28'>#</a> | ||
454 | </div> | ||
455 | <hr /> | ||
456 | </div> | ||
457 | <div class='code'> | ||
458 | <div class="highlight"><pre></pre></div> | ||
459 | </div> | ||
460 | </div> | ||
461 | <div class='clearall'></div> | ||
462 | <div class='section' id='section-29'> | ||
463 | <div class='docs'> | ||
464 | <div class='octowrap'> | ||
465 | <a class='octothorpe' href='#section-29'>#</a> | ||
466 | </div> | ||
467 | <h3>Handling a successful response</h3> | ||
468 | </div> | ||
469 | <div class='code'> | ||
470 | <div class="highlight"><pre></pre></div> | ||
471 | </div> | ||
472 | </div> | ||
473 | <div class='clearall'></div> | ||
474 | <div class='section' id='section-30'> | ||
475 | <div class='docs'> | ||
476 | <div class='octowrap'> | ||
477 | <a class='octothorpe' href='#section-30'>#</a> | ||
478 | </div> | ||
479 | <p>A successful response is defined as anything that returns a <code>HTTP 200</code>.</p> | ||
480 | </div> | ||
481 | <div class='code'> | ||
482 | <div class="highlight"><pre></pre></div> | ||
483 | </div> | ||
484 | </div> | ||
485 | <div class='clearall'></div> | ||
486 | <div class='section' id='section-31'> | ||
487 | <div class='docs'> | ||
488 | <div class='octowrap'> | ||
489 | <a class='octothorpe' href='#section-31'>#</a> | ||
490 | </div> | ||
491 | <p>On a successful response, we store the <code>access_token</code> returned on a private field, | ||
492 | <code>GoogleClient.__access_token</code>, and we update <code>GoogleClient.__expires</code> to be the time this | ||
493 | <code>access_token</code> expires. <code>GoogleClient.__expires</code> is a <code>datetime</code> object in UTC.</p> | ||
494 | </div> | ||
495 | <div class='code'> | ||
496 | <div class="highlight"><pre></pre></div> | ||
497 | </div> | ||
498 | </div> | ||
499 | <div class='clearall'></div> | ||
500 | <div class='section' id='section-32'> | ||
501 | <div class='docs'> | ||
502 | <div class='octowrap'> | ||
503 | <a class='octothorpe' href='#section-32'>#</a> | ||
504 | </div> | ||
505 | <h3>Handling an unsuccessful response</h3> | ||
506 | </div> | ||
507 | <div class='code'> | ||
508 | <div class="highlight"><pre></pre></div> | ||
509 | </div> | ||
510 | </div> | ||
511 | <div class='clearall'></div> | ||
512 | <div class='section' id='section-33'> | ||
513 | <div class='docs'> | ||
514 | <div class='octowrap'> | ||
515 | <a class='octothorpe' href='#section-33'>#</a> | ||
516 | </div> | ||
517 | <p>To handle unsuccessful requests, the tap has the following pattern</p> | ||
518 | </div> | ||
519 | <div class='code'> | ||
520 | <div class="highlight"><pre></pre></div> | ||
521 | </div> | ||
522 | </div> | ||
523 | <div class='clearall'></div> | ||
524 | <div class='section' id='section-34'> | ||
525 | <div class='docs'> | ||
526 | <div class='octowrap'> | ||
527 | <a class='octothorpe' href='#section-34'>#</a> | ||
528 | </div> | ||
529 | <pre><code class="language-Python">if response.status_code >= 500: | ||
530 | raise Server5xxError() | ||
531 | |||
532 | if response.status_code != 200: | ||
533 | raise_for_error(response) | ||
534 | </code></pre> | ||
535 | </div> | ||
536 | <div class='code'> | ||
537 | <div class="highlight"><pre></pre></div> | ||
538 | </div> | ||
539 | </div> | ||
540 | <div class='clearall'></div> | ||
541 | <div class='section' id='section-35'> | ||
542 | <div class='docs'> | ||
543 | <div class='octowrap'> | ||
544 | <a class='octothorpe' href='#section-35'>#</a> | ||
545 | </div> | ||
546 | <p>The <code>client.py:Server5xxError</code> is caught by <code>backoff</code> and we exponentially backoff the request.</p> | ||
547 | </div> | ||
548 | <div class='code'> | ||
549 | <div class="highlight"><pre></pre></div> | ||
550 | </div> | ||
551 | </div> | ||
552 | <div class='clearall'></div> | ||
553 | <div class='section' id='section-36'> | ||
554 | <div class='docs'> | ||
555 | <div class='octowrap'> | ||
556 | <a class='octothorpe' href='#section-36'>#</a> | ||
557 | </div> | ||
558 | <hr /> | ||
559 | </div> | ||
560 | <div class='code'> | ||
561 | <div class="highlight"><pre></pre></div> | ||
562 | </div> | ||
563 | </div> | ||
564 | <div class='clearall'></div> | ||
565 | <div class='section' id='section-37'> | ||
566 | <div class='docs'> | ||
567 | <div class='octowrap'> | ||
568 | <a class='octothorpe' href='#section-37'>#</a> | ||
569 | </div> | ||
570 | <p>This is a class implemented in the tap in <code>client.py</code>. We initialize it once in <code>__init__.py</code> as | ||
571 | a context manager in <code>__init__.py:main()</code></p> | ||
572 | </div> | ||
573 | <div class='code'> | ||
574 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">GoogleClient</span><span class="p">:</span> <span class="c1"># pylint: disable=too-many-instance-attributes</span></pre></div> | ||
575 | </div> | ||
576 | </div> | ||
577 | <div class='clearall'></div> | ||
578 | <div class='section' id='section-38'> | ||
579 | <div class='docs'> | ||
580 | <div class='octowrap'> | ||
581 | <a class='octothorpe' href='#section-38'>#</a> | ||
582 | </div> | ||
583 | |||
584 | </div> | ||
585 | <div class='code'> | ||
586 | <div class="highlight"><pre></pre></div> | ||
587 | </div> | ||
588 | </div> | ||
589 | <div class='clearall'></div> | ||
590 | <div class='section' id='section-39'> | ||
591 | <div class='docs'> | ||
592 | <div class='octowrap'> | ||
593 | <a class='octothorpe' href='#section-39'>#</a> | ||
594 | </div> | ||
595 | <p>To create the <code>GoogleClient</code> object, we have to pass in the three OAuth2 variables. Optionally we | ||
596 | can include the <code>user_agent</code>.</p> | ||
597 | <p>Side Effects:</p> | ||
598 | <ul> | ||
599 | <li>All of this gets stored in private fields by the constructor.</li> | ||
600 | <li>The constructor also initializes a <code>requests.Session</code>.</li> | ||
601 | </ul> | ||
602 | </div> | ||
603 | <div class='code'> | ||
604 | <div class="highlight"><pre> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">client_id</span><span class="p">,</span> <span class="n">client_secret</span><span class="p">,</span> <span class="n">refresh_token</span><span class="p">,</span> <span class="n">access_token</span><span class="p">,</span> <span class="n">user_agent</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
605 | </div> | ||
606 | </div> | ||
607 | <div class='clearall'></div> | ||
608 | <div class='section' id='section-40'> | ||
609 | <div class='docs'> | ||
610 | <div class='octowrap'> | ||
611 | <a class='octothorpe' href='#section-40'>#</a> | ||
612 | </div> | ||
613 | |||
614 | </div> | ||
615 | <div class='code'> | ||
616 | <div class="highlight"><pre> <span class="bp">self</span><span class="o">.</span><span class="n">__client_id</span> <span class="o">=</span> <span class="n">client_id</span> | ||
617 | <span class="bp">self</span><span class="o">.</span><span class="n">__client_secret</span> <span class="o">=</span> <span class="n">client_secret</span> | ||
618 | <span class="bp">self</span><span class="o">.</span><span class="n">__refresh_token</span> <span class="o">=</span> <span class="n">refresh_token</span> | ||
619 | <span class="bp">self</span><span class="o">.</span><span class="n">__user_agent</span> <span class="o">=</span> <span class="n">user_agent</span> | ||
620 | <span class="bp">self</span><span class="o">.</span><span class="n">__access_token</span> <span class="o">=</span> <span class="n">access_token</span> | ||
621 | <span class="bp">self</span><span class="o">.</span><span class="n">__session</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">Session</span><span class="p">()</span> | ||
622 | <span class="bp">self</span><span class="o">.</span><span class="n">base_url</span> <span class="o">=</span> <span class="kc">None</span></pre></div> | ||
623 | </div> | ||
624 | </div> | ||
625 | <div class='clearall'></div> | ||
626 | <div class='section' id='section-41'> | ||
627 | <div class='docs'> | ||
628 | <div class='octowrap'> | ||
629 | <a class='octothorpe' href='#section-41'>#</a> | ||
630 | </div> | ||
631 | <p>On enter, get a new access token</p> | ||
632 | </div> | ||
633 | <div class='code'> | ||
634 | <div class="highlight"><pre> <span class="k">def</span> <span class="fm">__enter__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></pre></div> | ||
635 | </div> | ||
636 | </div> | ||
637 | <div class='clearall'></div> | ||
638 | <div class='section' id='section-42'> | ||
639 | <div class='docs'> | ||
640 | <div class='octowrap'> | ||
641 | <a class='octothorpe' href='#section-42'>#</a> | ||
642 | </div> | ||
643 | |||
644 | </div> | ||
645 | <div class='code'> | ||
646 | <div class="highlight"><pre> <span class="bp">self</span><span class="o">.</span><span class="n">get_access_token</span><span class="p">()</span> | ||
647 | <span class="k">return</span> <span class="bp">self</span></pre></div> | ||
648 | </div> | ||
649 | </div> | ||
650 | <div class='clearall'></div> | ||
651 | <div class='section' id='section-43'> | ||
652 | <div class='docs'> | ||
653 | <div class='octowrap'> | ||
654 | <a class='octothorpe' href='#section-43'>#</a> | ||
655 | </div> | ||
656 | <p>On exit, close the Requests Session</p> | ||
657 | </div> | ||
658 | <div class='code'> | ||
659 | <div class="highlight"><pre> <span class="k">def</span> <span class="fm">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">exception_type</span><span class="p">,</span> <span class="n">exception_value</span><span class="p">,</span> <span class="n">traceback</span><span class="p">):</span></pre></div> | ||
660 | </div> | ||
661 | </div> | ||
662 | <div class='clearall'></div> | ||
663 | <div class='section' id='section-44'> | ||
664 | <div class='docs'> | ||
665 | <div class='octowrap'> | ||
666 | <a class='octothorpe' href='#section-44'>#</a> | ||
667 | </div> | ||
668 | |||
669 | </div> | ||
670 | <div class='code'> | ||
671 | <div class="highlight"><pre> <span class="bp">self</span><span class="o">.</span><span class="n">__session</span><span class="o">.</span><span class="n">close</span><span class="p">()</span></pre></div> | ||
672 | </div> | ||
673 | </div> | ||
674 | <div class='clearall'></div> | ||
675 | <div class='section' id='section-45'> | ||
676 | <div class='docs'> | ||
677 | <div class='octowrap'> | ||
678 | <a class='octothorpe' href='#section-45'>#</a> | ||
679 | </div> | ||
680 | <p><code>get_access_token()</code> will <code>POST</code> to <code>client.py:GOOGLE_TOKEN_URI</code> which is just | ||
681 | <code>https://oauth2.googleapis.com/token</code>. The body of the <code>POST</code> looks like</p> | ||
682 | <pre><code class="language-JSON">{ | ||
683 | "grant_type": "refresh_token", | ||
684 | "client_id": my_client_id, | ||
685 | "client_secret": my_client_secret, | ||
686 | "refresh_token": my_refresh_token | ||
687 | } | ||
688 | </code></pre> | ||
689 | <p>Side Effects:</p> | ||
690 | <ul> | ||
691 | <li>Store the access token and time it expires in private fields on the Client object</li> | ||
692 | </ul> | ||
693 | </div> | ||
694 | <div class='code'> | ||
695 | <div class="highlight"><pre> <span class="nd">@backoff</span><span class="o">.</span><span class="n">on_exception</span><span class="p">(</span><span class="n">backoff</span><span class="o">.</span><span class="n">expo</span><span class="p">,</span> | ||
696 | <span class="n">Server5xxError</span><span class="p">,</span> | ||
697 | <span class="n">max_tries</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> | ||
698 | <span class="n">factor</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> | ||
699 | <span class="k">def</span> <span class="nf">get_access_token</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span></pre></div> | ||
700 | </div> | ||
701 | </div> | ||
702 | <div class='clearall'></div> | ||
703 | <div class='section' id='section-46'> | ||
704 | <div class='docs'> | ||
705 | <div class='octowrap'> | ||
706 | <a class='octothorpe' href='#section-46'>#</a> | ||
707 | </div> | ||
708 | |||
709 | </div> | ||
710 | <div class='code'> | ||
711 | <div class="highlight"><pre> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">__access_token</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | ||
712 | <span class="k">return</span> | ||
713 | |||
714 | <span class="n">headers</span> <span class="o">=</span> <span class="p">{}</span> | ||
715 | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">__user_agent</span><span class="p">:</span> | ||
716 | <span class="n">headers</span><span class="p">[</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__user_agent</span> | ||
717 | |||
718 | <span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__session</span><span class="o">.</span><span class="n">post</span><span class="p">(</span> | ||
719 | <span class="n">url</span><span class="o">=</span><span class="n">GOOGLE_TOKEN_URI</span><span class="p">,</span> | ||
720 | <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span> | ||
721 | <span class="n">data</span><span class="o">=</span><span class="p">{</span> | ||
722 | <span class="s1">'grant_type'</span><span class="p">:</span> <span class="s1">'refresh_token'</span><span class="p">,</span> | ||
723 | <span class="s1">'client_id'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">__client_id</span><span class="p">,</span> | ||
724 | <span class="s1">'client_secret'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">__client_secret</span><span class="p">,</span> | ||
725 | <span class="s1">'refresh_token'</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">__refresh_token</span><span class="p">,</span> | ||
726 | <span class="p">})</span> | ||
727 | |||
728 | <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">>=</span> <span class="mi">500</span><span class="p">:</span> | ||
729 | <span class="k">raise</span> <span class="n">Server5xxError</span><span class="p">()</span> | ||
730 | |||
731 | <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">!=</span> <span class="mi">200</span><span class="p">:</span> | ||
732 | <span class="n">raise_for_error</span><span class="p">(</span><span class="n">response</span><span class="p">)</span> | ||
733 | |||
734 | <span class="n">data</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">json</span><span class="p">()</span> | ||
735 | <span class="bp">self</span><span class="o">.</span><span class="n">__access_token</span> <span class="o">=</span> <span class="n">data</span><span class="p">[</span><span class="s1">'access_token'</span><span class="p">]</span> | ||
736 | <span class="bp">self</span><span class="o">.</span><span class="n">__expires</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">utcnow</span><span class="p">()</span> <span class="o">+</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">data</span><span class="p">[</span><span class="s1">'expires_in'</span><span class="p">])</span> | ||
737 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Authorized, token expires = </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__expires</span><span class="p">))</span></pre></div> | ||
738 | </div> | ||
739 | </div> | ||
740 | <div class='clearall'></div> | ||
741 | <div class='section' id='section-47'> | ||
742 | <div class='docs'> | ||
743 | <div class='octowrap'> | ||
744 | <a class='octothorpe' href='#section-47'>#</a> | ||
745 | </div> | ||
746 | <hr /> | ||
747 | </div> | ||
748 | <div class='code'> | ||
749 | <div class="highlight"><pre></pre></div> | ||
750 | </div> | ||
751 | </div> | ||
752 | <div class='clearall'></div> | ||
753 | <div class='section' id='section-48'> | ||
754 | <div class='docs'> | ||
755 | <div class='octowrap'> | ||
756 | <a class='octothorpe' href='#section-48'>#</a> | ||
757 | </div> | ||
758 | <p>This function starts with a call to <code>GoogleClient.get_access_token()</code> which likely returns | ||
759 | immediately most of the time.</p> | ||
760 | </div> | ||
761 | <div class='code'> | ||
762 | <div class="highlight"><pre></pre></div> | ||
763 | </div> | ||
764 | </div> | ||
765 | <div class='clearall'></div> | ||
766 | <div class='section' id='section-49'> | ||
767 | <div class='docs'> | ||
768 | <div class='octowrap'> | ||
769 | <a class='octothorpe' href='#section-49'>#</a> | ||
770 | </div> | ||
771 | <p>Then we decide what url we are sending the request to. Sometimes it’s | ||
772 | <code>https://sheets.googleapis.com/v4</code> and sometimes it’s <code>https://www.googleapis.com/drive/v3</code>.</p> | ||
773 | </div> | ||
774 | <div class='code'> | ||
775 | <div class="highlight"><pre></pre></div> | ||
776 | </div> | ||
777 | </div> | ||
778 | <div class='clearall'></div> | ||
779 | <div class='section' id='section-50'> | ||
780 | <div class='docs'> | ||
781 | <div class='octowrap'> | ||
782 | <a class='octothorpe' href='#section-50'>#</a> | ||
783 | </div> | ||
784 | <ul> | ||
785 | <li>It seems like a mistake to decide this so deep into the code. Why doesn’t the caller decide | ||
786 | where the request goes?</li> | ||
787 | </ul> | ||
788 | </div> | ||
789 | <div class='code'> | ||
790 | <div class="highlight"><pre></pre></div> | ||
791 | </div> | ||
792 | </div> | ||
793 | <div class='clearall'></div> | ||
794 | <div class='section' id='section-51'> | ||
795 | <div class='docs'> | ||
796 | <div class='octowrap'> | ||
797 | <a class='octothorpe' href='#section-51'>#</a> | ||
798 | </div> | ||
799 | <p>Then we set up the request headers. The <code>authorization</code>, <code>user-agent</code>, and <code>content-type</code> keys come | ||
800 | into play here</p> | ||
801 | </div> | ||
802 | <div class='code'> | ||
803 | <div class="highlight"><pre></pre></div> | ||
804 | </div> | ||
805 | </div> | ||
806 | <div class='clearall'></div> | ||
807 | <div class='section' id='section-52'> | ||
808 | <div class='docs'> | ||
809 | <div class='octowrap'> | ||
810 | <a class='octothorpe' href='#section-52'>#</a> | ||
811 | </div> | ||
812 | <ul> | ||
813 | <li>One benefit of a a <code>requests.Session</code> is that you can set the headers for the session. I’m not | ||
814 | sure why we don’t do that here</li> | ||
815 | <li>If we did that, we wouldn’t have to think about the access_token making it into the headers | ||
816 | here. They would just already be there</li> | ||
817 | </ul> | ||
818 | </div> | ||
819 | <div class='code'> | ||
820 | <div class="highlight"><pre></pre></div> | ||
821 | </div> | ||
822 | </div> | ||
823 | <div class='clearall'></div> | ||
824 | <div class='section' id='section-53'> | ||
825 | <div class='docs'> | ||
826 | <div class='octowrap'> | ||
827 | <a class='octothorpe' href='#section-53'>#</a> | ||
828 | </div> | ||
829 | <p>Then we make the request, timing how long it takes with a <code>singer.metrics.http_request_timer</code>.</p> | ||
830 | </div> | ||
831 | <div class='code'> | ||
832 | <div class="highlight"><pre></pre></div> | ||
833 | </div> | ||
834 | </div> | ||
835 | <div class='clearall'></div> | ||
836 | <div class='section' id='section-54'> | ||
837 | <div class='docs'> | ||
838 | <div class='octowrap'> | ||
839 | <a class='octothorpe' href='#section-54'>#</a> | ||
840 | </div> | ||
841 | <p>The chunk of code after making the request handles an unsuccessful response. We will retry <code>HTTP | ||
842 | 500</code> and <code>HTTP 429</code> errors, and <code>client.py:raise_for_error</code> for everything else.</p> | ||
843 | </div> | ||
844 | <div class='code'> | ||
845 | <div class="highlight"><pre></pre></div> | ||
846 | </div> | ||
847 | </div> | ||
848 | <div class='clearall'></div> | ||
849 | <div class='section' id='section-55'> | ||
850 | <div class='docs'> | ||
851 | <div class='octowrap'> | ||
852 | <a class='octothorpe' href='#section-55'>#</a> | ||
853 | </div> | ||
854 | <p>The most unique thing of this tap happens here: we return an <code>OrderedDict</code> of the response with this | ||
855 | line</p> | ||
856 | </div> | ||
857 | <div class='code'> | ||
858 | <div class="highlight"><pre></pre></div> | ||
859 | </div> | ||
860 | </div> | ||
861 | <div class='clearall'></div> | ||
862 | <div class='section' id='section-56'> | ||
863 | <div class='docs'> | ||
864 | <div class='octowrap'> | ||
865 | <a class='octothorpe' href='#section-56'>#</a> | ||
866 | </div> | ||
867 | <pre><code class="language-Python">return response.json(object_pairs_hook=OrderedDict) | ||
868 | </code></pre> | ||
869 | </div> | ||
870 | <div class='code'> | ||
871 | <div class="highlight"><pre></pre></div> | ||
872 | </div> | ||
873 | </div> | ||
874 | <div class='clearall'></div> | ||
875 | <div class='section' id='section-57'> | ||
876 | <div class='docs'> | ||
877 | <div class='octowrap'> | ||
878 | <a class='octothorpe' href='#section-57'>#</a> | ||
879 | </div> | ||
880 | <p>where <code>object_pairs_hook</code> is a <code>kwarg</code> passed to the JSON parser used by <code>requests</code>.</p> | ||
881 | </div> | ||
882 | <div class='code'> | ||
883 | <div class="highlight"><pre></pre></div> | ||
884 | </div> | ||
885 | </div> | ||
886 | <div class='clearall'></div> | ||
887 | <div class='section' id='section-58'> | ||
888 | <div class='docs'> | ||
889 | <div class='octowrap'> | ||
890 | <a class='octothorpe' href='#section-58'>#</a> | ||
891 | </div> | ||
892 | <p>This turns every key-value pair in the JSON response into a <code>OrderedDict</code>.</p> | ||
893 | </div> | ||
894 | <div class='code'> | ||
895 | <div class="highlight"><pre></pre></div> | ||
896 | </div> | ||
897 | </div> | ||
898 | <div class='clearall'></div> | ||
899 | <div class='section' id='section-59'> | ||
900 | <div class='docs'> | ||
901 | <div class='octowrap'> | ||
902 | <a class='octothorpe' href='#section-59'>#</a> | ||
903 | </div> | ||
904 | <p>Why do we do this? I don’t know. See the footnote for code examples</p> | ||
905 | </div> | ||
906 | <div class='code'> | ||
907 | <div class="highlight"><pre></pre></div> | ||
908 | </div> | ||
909 | </div> | ||
910 | <div class='clearall'></div> | ||
911 | <div class='section' id='section-60'> | ||
912 | <div class='docs'> | ||
913 | <div class='octowrap'> | ||
914 | <a class='octothorpe' href='#section-60'>#</a> | ||
915 | </div> | ||
916 | <hr /> | ||
917 | </div> | ||
918 | <div class='code'> | ||
919 | <div class="highlight"><pre></pre></div> | ||
920 | </div> | ||
921 | </div> | ||
922 | <div class='clearall'></div> | ||
923 | <div class='section' id='section-61'> | ||
924 | <div class='docs'> | ||
925 | <div class='octowrap'> | ||
926 | <a class='octothorpe' href='#section-61'>#</a> | ||
927 | </div> | ||
928 | <p>Rate Limit: https://developers.google.com/sheets/api/limits | ||
929 | 100 request per 100 seconds per User</p> | ||
930 | </div> | ||
931 | <div class='code'> | ||
932 | <div class="highlight"><pre> <span class="nd">@backoff</span><span class="o">.</span><span class="n">on_exception</span><span class="p">(</span><span class="n">backoff</span><span class="o">.</span><span class="n">expo</span><span class="p">,</span> | ||
933 | <span class="p">(</span><span class="n">Server5xxError</span><span class="p">,</span> <span class="ne">ConnectionError</span><span class="p">,</span> <span class="n">Server429Error</span><span class="p">),</span> | ||
934 | <span class="n">max_tries</span><span class="o">=</span><span class="mi">7</span><span class="p">,</span> | ||
935 | <span class="n">factor</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span> | ||
936 | <span class="nd">@utils</span><span class="o">.</span><span class="n">ratelimit</span><span class="p">(</span><span class="mi">100</span><span class="p">,</span> <span class="mi">100</span><span class="p">)</span> | ||
937 | <span class="k">def</span> <span class="nf">request</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">method</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">url</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">api</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span></pre></div> | ||
938 | </div> | ||
939 | </div> | ||
940 | <div class='clearall'></div> | ||
941 | <div class='section' id='section-62'> | ||
942 | <div class='docs'> | ||
943 | <div class='octowrap'> | ||
944 | <a class='octothorpe' href='#section-62'>#</a> | ||
945 | </div> | ||
946 | <p>Make a request to the API</p> | ||
947 | <p>Inputs:</p> | ||
948 | <ul> | ||
949 | <li>method: “GET” or “POST”</li> | ||
950 | <li>url: The start of the url to make the request to</li> | ||
951 | <li>path:</li> | ||
952 | </ul> | ||
953 | <p>Returns:</p> | ||
954 | <ul> | ||
955 | <li>A requests.Reponse</li> | ||
956 | </ul> | ||
957 | <p>Side Effects:</p> | ||
958 | <ul> | ||
959 | <li>Might store a new access token</li> | ||
960 | </ul> | ||
961 | </div> | ||
962 | <div class='code'> | ||
963 | <div class="highlight"><pre> <span class="bp">self</span><span class="o">.</span><span class="n">get_access_token</span><span class="p">()</span></pre></div> | ||
964 | </div> | ||
965 | </div> | ||
966 | <div class='clearall'></div> | ||
967 | <div class='section' id='section-63'> | ||
968 | <div class='docs'> | ||
969 | <div class='octowrap'> | ||
970 | <a class='octothorpe' href='#section-63'>#</a> | ||
971 | </div> | ||
972 | <p>Construct the URL to make a request to</p> | ||
973 | </div> | ||
974 | <div class='code'> | ||
975 | <div class="highlight"><pre> <span class="bp">self</span><span class="o">.</span><span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://sheets.googleapis.com/v4'</span> | ||
976 | <span class="k">if</span> <span class="n">api</span> <span class="o">==</span> <span class="s1">'files'</span><span class="p">:</span> | ||
977 | <span class="bp">self</span><span class="o">.</span><span class="n">base_url</span> <span class="o">=</span> <span class="s1">'https://www.googleapis.com/drive/v3'</span> | ||
978 | |||
979 | <span class="k">if</span> <span class="ow">not</span> <span class="n">url</span> <span class="ow">and</span> <span class="n">path</span><span class="p">:</span> | ||
980 | <span class="n">url</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">/</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="n">path</span><span class="p">)</span> | ||
981 | |||
982 | <span class="k">if</span> <span class="s1">'endpoint'</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span> | ||
983 | <span class="n">endpoint</span> <span class="o">=</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'endpoint'</span><span class="p">]</span> | ||
984 | <span class="k">del</span> <span class="n">kwargs</span><span class="p">[</span><span class="s1">'endpoint'</span><span class="p">]</span> | ||
985 | <span class="k">else</span><span class="p">:</span> | ||
986 | <span class="n">endpoint</span> <span class="o">=</span> <span class="kc">None</span> | ||
987 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'</span><span class="si">{}</span><span class="s1"> URL = </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">endpoint</span><span class="p">,</span> <span class="n">url</span><span class="p">))</span></pre></div> | ||
988 | </div> | ||
989 | </div> | ||
990 | <div class='clearall'></div> | ||
991 | <div class='section' id='section-64'> | ||
992 | <div class='docs'> | ||
993 | <div class='octowrap'> | ||
994 | <a class='octothorpe' href='#section-64'>#</a> | ||
995 | </div> | ||
996 | <p>Contruct the <code>headers</code> arg for <code>requests.request()</code></p> | ||
997 | </div> | ||
998 | <div class='code'> | ||
999 | <div class="highlight"><pre> <span class="k">if</span> <span class="s1">'headers'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span> | ||
1000 | <span class="n">kwargs</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> | ||
1001 | <span class="n">kwargs</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Authorization'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'Bearer </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__access_token</span><span class="p">)</span> | ||
1002 | |||
1003 | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">__user_agent</span><span class="p">:</span> | ||
1004 | <span class="n">kwargs</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'User-Agent'</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__user_agent</span> | ||
1005 | |||
1006 | <span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s1">'POST'</span><span class="p">:</span> | ||
1007 | <span class="n">kwargs</span><span class="p">[</span><span class="s1">'headers'</span><span class="p">][</span><span class="s1">'Content-Type'</span><span class="p">]</span> <span class="o">=</span> <span class="s1">'application/json'</span></pre></div> | ||
1008 | </div> | ||
1009 | </div> | ||
1010 | <div class='clearall'></div> | ||
1011 | <div class='section' id='section-65'> | ||
1012 | <div class='docs'> | ||
1013 | <div class='octowrap'> | ||
1014 | <a class='octothorpe' href='#section-65'>#</a> | ||
1015 | </div> | ||
1016 | <p>Make request</p> | ||
1017 | </div> | ||
1018 | <div class='code'> | ||
1019 | <div class="highlight"><pre> <span class="k">with</span> <span class="n">metrics</span><span class="o">.</span><span class="n">http_request_timer</span><span class="p">(</span><span class="n">endpoint</span><span class="p">)</span> <span class="k">as</span> <span class="n">timer</span><span class="p">:</span> | ||
1020 | <span class="n">response</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">__session</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="n">method</span><span class="p">,</span> <span class="n">url</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | ||
1021 | <span class="n">timer</span><span class="o">.</span><span class="n">tags</span><span class="p">[</span><span class="n">metrics</span><span class="o">.</span><span class="n">Tag</span><span class="o">.</span><span class="n">http_status_code</span><span class="p">]</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span></pre></div> | ||
1022 | </div> | ||
1023 | </div> | ||
1024 | <div class='clearall'></div> | ||
1025 | <div class='section' id='section-66'> | ||
1026 | <div class='docs'> | ||
1027 | <div class='octowrap'> | ||
1028 | <a class='octothorpe' href='#section-66'>#</a> | ||
1029 | </div> | ||
1030 | <p>Start backoff logic</p> | ||
1031 | </div> | ||
1032 | <div class='code'> | ||
1033 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">>=</span> <span class="mi">500</span><span class="p">:</span> | ||
1034 | <span class="k">raise</span> <span class="n">Server5xxError</span><span class="p">()</span> | ||
1035 | |||
1036 | <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">==</span> <span class="mi">429</span><span class="p">:</span> | ||
1037 | <span class="k">raise</span> <span class="n">Server429Error</span><span class="p">()</span> | ||
1038 | |||
1039 | <span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="o">!=</span> <span class="mi">200</span><span class="p">:</span> | ||
1040 | <span class="n">raise_for_error</span><span class="p">(</span><span class="n">response</span><span class="p">)</span></pre></div> | ||
1041 | </div> | ||
1042 | </div> | ||
1043 | <div class='clearall'></div> | ||
1044 | <div class='section' id='section-67'> | ||
1045 | <div class='docs'> | ||
1046 | <div class='octowrap'> | ||
1047 | <a class='octothorpe' href='#section-67'>#</a> | ||
1048 | </div> | ||
1049 | <p>Ensure keys and rows are ordered as received from API. | ||
1050 | QUESITON: But why??</p> | ||
1051 | </div> | ||
1052 | <div class='code'> | ||
1053 | <div class="highlight"><pre> <span class="k">return</span> <span class="n">response</span><span class="o">.</span><span class="n">json</span><span class="p">(</span><span class="n">object_pairs_hook</span><span class="o">=</span><span class="n">OrderedDict</span><span class="p">)</span></pre></div> | ||
1054 | </div> | ||
1055 | </div> | ||
1056 | <div class='clearall'></div> | ||
1057 | <div class='section' id='section-68'> | ||
1058 | <div class='docs'> | ||
1059 | <div class='octowrap'> | ||
1060 | <a class='octothorpe' href='#section-68'>#</a> | ||
1061 | </div> | ||
1062 | <h3>Syntactic Sugar</h3> | ||
1063 | </div> | ||
1064 | <div class='code'> | ||
1065 | <div class="highlight"><pre> <span class="k">def</span> <span class="nf">get</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">api</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> | ||
1066 | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">'GET'</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></pre></div> | ||
1067 | </div> | ||
1068 | </div> | ||
1069 | <div class='clearall'></div> | ||
1070 | <div class='section' id='section-69'> | ||
1071 | <div class='docs'> | ||
1072 | <div class='octowrap'> | ||
1073 | <a class='octothorpe' href='#section-69'>#</a> | ||
1074 | </div> | ||
1075 | |||
1076 | </div> | ||
1077 | <div class='code'> | ||
1078 | <div class="highlight"><pre> <span class="k">def</span> <span class="nf">post</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">api</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span> | ||
1079 | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">request</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">'POST'</span><span class="p">,</span> <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></pre></div> | ||
1080 | </div> | ||
1081 | </div> | ||
1082 | <div class='clearall'></div> | ||
1083 | <div class='section' id='section-70'> | ||
1084 | <div class='docs'> | ||
1085 | <div class='octowrap'> | ||
1086 | <a class='octothorpe' href='#section-70'>#</a> | ||
1087 | </div> | ||
1088 | <hr /> | ||
1089 | </div> | ||
1090 | <div class='code'> | ||
1091 | <div class="highlight"><pre></pre></div> | ||
1092 | </div> | ||
1093 | </div> | ||
1094 | <div class='clearall'></div> | ||
1095 | <div class='section' id='section-71'> | ||
1096 | <div class='docs'> | ||
1097 | <div class='octowrap'> | ||
1098 | <a class='octothorpe' href='#section-71'>#</a> | ||
1099 | </div> | ||
1100 | <h1>Footnotes</h1> | ||
1101 | </div> | ||
1102 | <div class='code'> | ||
1103 | <div class="highlight"><pre></pre></div> | ||
1104 | </div> | ||
1105 | </div> | ||
1106 | <div class='clearall'></div> | ||
1107 | <div class='section' id='section-72'> | ||
1108 | <div class='docs'> | ||
1109 | <div class='octowrap'> | ||
1110 | <a class='octothorpe' href='#section-72'>#</a> | ||
1111 | </div> | ||
1112 | <p>Here’s a normal <code>.json()</code>‘s output</p> | ||
1113 | <pre><code class="language-Python">{"file": "this is my file"} | ||
1114 | </code></pre> | ||
1115 | </div> | ||
1116 | <div class='code'> | ||
1117 | <div class="highlight"><pre></pre></div> | ||
1118 | </div> | ||
1119 | </div> | ||
1120 | <div class='clearall'></div> | ||
1121 | <div class='section' id='section-73'> | ||
1122 | <div class='docs'> | ||
1123 | <div class='octowrap'> | ||
1124 | <a class='octothorpe' href='#section-73'>#</a> | ||
1125 | </div> | ||
1126 | <p>Here’s the weird one’s <code>.json(object_pairs_hook=OrderedDict)</code> output</p> | ||
1127 | <pre><code class="language-Python">OrderedDict([('file', 'this is my file')]) | ||
1128 | </code></pre> | ||
1129 | </div> | ||
1130 | <div class='code'> | ||
1131 | <div class="highlight"><pre></pre></div> | ||
1132 | </div> | ||
1133 | </div> | ||
1134 | <div class='clearall'></div> | ||
1135 | <div class='section' id='section-74'> | ||
1136 | <div class='docs'> | ||
1137 | <div class='octowrap'> | ||
1138 | <a class='octothorpe' href='#section-74'>#</a> | ||
1139 | </div> | ||
1140 | <p>Here’s a more complex example:</p> | ||
1141 | </div> | ||
1142 | <div class='code'> | ||
1143 | <div class="highlight"><pre></pre></div> | ||
1144 | </div> | ||
1145 | </div> | ||
1146 | <div class='clearall'></div> | ||
1147 | <div class='section' id='section-75'> | ||
1148 | <div class='docs'> | ||
1149 | <div class='octowrap'> | ||
1150 | <a class='octothorpe' href='#section-75'>#</a> | ||
1151 | </div> | ||
1152 | <pre><code class="language-python">{ "deleted": false, | ||
1153 | "__v": 0, | ||
1154 | "_id": "5887e1d85c873e0011036889", | ||
1155 | "text": "Cats make about 100 different sounds. Dogs make only about 10.", | ||
1156 | "createdAt": "2018-01-15T21:20:00.003Z", | ||
1157 | "updatedAt": "2020-09-03T16:39:39.578Z", | ||
1158 | "used": true, | ||
1159 | "status": { | ||
1160 | "sentCount": 1, | ||
1161 | "feedback": "", | ||
1162 | "verified": true | ||
1163 | }, | ||
1164 | "type": "cat", | ||
1165 | "user": "5a9ac18c7478810ea6c06381", | ||
1166 | "source": "user"} | ||
1167 | </code></pre> | ||
1168 | </div> | ||
1169 | <div class='code'> | ||
1170 | <div class="highlight"><pre></pre></div> | ||
1171 | </div> | ||
1172 | </div> | ||
1173 | <div class='clearall'></div> | ||
1174 | <div class='section' id='section-76'> | ||
1175 | <div class='docs'> | ||
1176 | <div class='octowrap'> | ||
1177 | <a class='octothorpe' href='#section-76'>#</a> | ||
1178 | </div> | ||
1179 | <p>Versus <code>.json(object_pairs_hook=OrderedDict)</code></p> | ||
1180 | <pre><code class="language-python">OrderedDict([('status', OrderedDict([('verified', True), | ||
1181 | ('sentCount', 1), | ||
1182 | ('feedback', '')])), | ||
1183 | ('type', 'cat'), | ||
1184 | ('deleted', False), | ||
1185 | ('_id', '5887e1d85c873e0011036889'), | ||
1186 | ('user', '5a9ac18c7478810ea6c06381'), | ||
1187 | ('text', 'Cats make about 100 different sounds. Dogs make only about 10.'), | ||
1188 | ('__v', 0), | ||
1189 | ('source', 'user'), | ||
1190 | ('updatedAt', '2020-09-03T16:39:39.578Z'), | ||
1191 | ('createdAt', '2018-01-15T21:20:00.003Z'), | ||
1192 | ('used', True)]) | ||
1193 | </code></pre> | ||
1194 | </div> | ||
1195 | <div class='code'> | ||
1196 | <div class="highlight"><pre></pre></div> | ||
1197 | </div> | ||
1198 | </div> | ||
1199 | <div class='clearall'></div> | ||
1200 | </div> | ||
1201 | </body> | ||
diff --git a/docs/discover.html b/docs/discover.html new file mode 100644 index 0000000..aecc2b6 --- /dev/null +++ b/docs/discover.html | |||
@@ -0,0 +1,127 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>discover.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>discover.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | |||
21 | </div> | ||
22 | <div class='code'> | ||
23 | <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">singer.catalog</span> <span class="kn">import</span> <span class="n">Catalog</span><span class="p">,</span> <span class="n">CatalogEntry</span><span class="p">,</span> <span class="n">Schema</span> | ||
24 | <span class="kn">from</span> <span class="nn">tap_google_sheets.schema</span> <span class="kn">import</span> <span class="n">get_schemas</span><span class="p">,</span> <span class="n">STREAMS</span></pre></div> | ||
25 | </div> | ||
26 | </div> | ||
27 | <div class='clearall'></div> | ||
28 | <div class='section' id='section-1'> | ||
29 | <div class='docs'> | ||
30 | <div class='octowrap'> | ||
31 | <a class='octothorpe' href='#section-1'>#</a> | ||
32 | </div> | ||
33 | <p>Construct a Catalog Entry for each stream</p> | ||
34 | <p>Inputs:</p> | ||
35 | <ul> | ||
36 | <li>client: A <code>GoogleClient</code> object</li> | ||
37 | <li>spreadsheet_id: the ID of a Google Sheet Doc</li> | ||
38 | </ul> | ||
39 | <p>Returns:</p> | ||
40 | <ul> | ||
41 | <li>A singer.Catalog object</li> | ||
42 | </ul> | ||
43 | </div> | ||
44 | <div class='code'> | ||
45 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">discover</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">):</span></pre></div> | ||
46 | </div> | ||
47 | </div> | ||
48 | <div class='clearall'></div> | ||
49 | <div class='section' id='section-2'> | ||
50 | <div class='docs'> | ||
51 | <div class='octowrap'> | ||
52 | <a class='octothorpe' href='#section-2'>#</a> | ||
53 | </div> | ||
54 | <p>It’s typical for taps in this style to call <code>schema.py:get_schemas()</code> to get <code>schemas</code> and | ||
55 | <code>field_metadata</code>.</p> | ||
56 | </div> | ||
57 | <div class='code'> | ||
58 | <div class="highlight"><pre></pre></div> | ||
59 | </div> | ||
60 | </div> | ||
61 | <div class='clearall'></div> | ||
62 | <div class='section' id='section-3'> | ||
63 | <div class='docs'> | ||
64 | <div class='octowrap'> | ||
65 | <a class='octothorpe' href='#section-3'>#</a> | ||
66 | </div> | ||
67 | <p>Here <code>schemas</code> is a dictionary of stream name to JSON schema and <code>field_metadata</code> is a dictionary | ||
68 | of stream name to another dictionary of stuff. In this tap, it seems that <code>discover.py:discover()</code> | ||
69 | only cares about sometimes getting <code>table-key-properties</code> from <code>field_metadata</code>.</p> | ||
70 | </div> | ||
71 | <div class='code'> | ||
72 | <div class="highlight"><pre></pre></div> | ||
73 | </div> | ||
74 | </div> | ||
75 | <div class='clearall'></div> | ||
76 | <div class='section' id='section-4'> | ||
77 | <div class='docs'> | ||
78 | <div class='octowrap'> | ||
79 | <a class='octothorpe' href='#section-4'>#</a> | ||
80 | </div> | ||
81 | <ul> | ||
82 | <li>This could be a point of confusion because <code>table-key-properties</code> is a stream / table level | ||
83 | metadata, which you may or may not expect to be returned and stored in <code>field_metadata</code>.</li> | ||
84 | </ul> | ||
85 | </div> | ||
86 | <div class='code'> | ||
87 | <div class="highlight"><pre> <span class="n">schemas</span><span class="p">,</span> <span class="n">field_metadata</span> <span class="o">=</span> <span class="n">get_schemas</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">)</span> | ||
88 | <span class="n">catalog</span> <span class="o">=</span> <span class="n">Catalog</span><span class="p">([])</span> | ||
89 | |||
90 | <span class="k">for</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">schema_dict</span> <span class="ow">in</span> <span class="n">schemas</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> | ||
91 | <span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">schema_dict</span><span class="p">)</span> | ||
92 | <span class="n">mdata</span> <span class="o">=</span> <span class="n">field_metadata</span><span class="p">[</span><span class="n">stream_name</span><span class="p">]</span> | ||
93 | <span class="n">key_properties</span> <span class="o">=</span> <span class="kc">None</span> | ||
94 | <span class="k">for</span> <span class="n">mdt</span> <span class="ow">in</span> <span class="n">mdata</span><span class="p">:</span> | ||
95 | <span class="n">table_key_properties</span> <span class="o">=</span> <span class="n">mdt</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'metadata'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'table-key-properties'</span><span class="p">)</span> | ||
96 | <span class="k">if</span> <span class="n">table_key_properties</span><span class="p">:</span> | ||
97 | <span class="n">key_properties</span> <span class="o">=</span> <span class="n">table_key_properties</span></pre></div> | ||
98 | </div> | ||
99 | </div> | ||
100 | <div class='clearall'></div> | ||
101 | <div class='section' id='section-5'> | ||
102 | <div class='docs'> | ||
103 | <div class='octowrap'> | ||
104 | <a class='octothorpe' href='#section-5'>#</a> | ||
105 | </div> | ||
106 | <p>Once you have the <code>stream_name</code>, value of <code>table-key-properties</code>, the schema, and the | ||
107 | metadata for the some stream, we pass all of that to the <code>singer.CatalogEntry</code> constructor | ||
108 | and append that to the <code>singer.Catalog</code> object initialized at the start of | ||
109 | <code>discover.py:discover()</code>.</p> | ||
110 | </div> | ||
111 | <div class='code'> | ||
112 | <div class="highlight"><pre> <span class="n">catalog</span><span class="o">.</span><span class="n">streams</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">CatalogEntry</span><span class="p">(</span> | ||
113 | <span class="n">stream</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
114 | <span class="n">tap_stream_id</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
115 | <span class="n">key_properties</span><span class="o">=</span><span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'key_properties'</span><span class="p">,</span> <span class="n">key_properties</span><span class="p">),</span> | ||
116 | <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> | ||
117 | <span class="n">metadata</span><span class="o">=</span><span class="n">mdata</span> | ||
118 | <span class="p">))</span> | ||
119 | |||
120 | <span class="k">return</span> <span class="n">catalog</span> | ||
121 | |||
122 | </pre></div> | ||
123 | </div> | ||
124 | </div> | ||
125 | <div class='clearall'></div> | ||
126 | </div> | ||
127 | </body> | ||
diff --git a/docs/pycco.css b/docs/pycco.css new file mode 100644 index 0000000..a45d4c0 --- /dev/null +++ b/docs/pycco.css | |||
@@ -0,0 +1,191 @@ | |||
1 | /*--------------------- Layout and Typography ----------------------------*/ | ||
2 | body { | ||
3 | font-family: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif; | ||
4 | font-size: 16px; | ||
5 | line-height: 24px; | ||
6 | color: #252519; | ||
7 | margin: 0; padding: 0; | ||
8 | background: #f5f5ff; | ||
9 | } | ||
10 | a { | ||
11 | color: #261a3b; | ||
12 | } | ||
13 | a:visited { | ||
14 | color: #261a3b; | ||
15 | } | ||
16 | p { | ||
17 | margin: 0 0 15px 0; | ||
18 | } | ||
19 | h1, h2, h3, h4, h5, h6 { | ||
20 | margin: 40px 0 15px 0; | ||
21 | } | ||
22 | h2, h3, h4, h5, h6 { | ||
23 | margin-top: 0; | ||
24 | } | ||
25 | #container { | ||
26 | background: white; | ||
27 | } | ||
28 | #container, div.section { | ||
29 | position: relative; | ||
30 | } | ||
31 | #background { | ||
32 | position: absolute; | ||
33 | top: 0; left: 580px; right: 0; bottom: 0; | ||
34 | background: #f5f5ff; | ||
35 | border-left: 1px solid #e5e5ee; | ||
36 | z-index: 0; | ||
37 | } | ||
38 | #jump_to, #jump_page { | ||
39 | background: white; | ||
40 | -webkit-box-shadow: 0 0 25px #777; -moz-box-shadow: 0 0 25px #777; | ||
41 | -webkit-border-bottom-left-radius: 5px; -moz-border-radius-bottomleft: 5px; | ||
42 | font: 10px Arial; | ||
43 | text-transform: uppercase; | ||
44 | cursor: pointer; | ||
45 | text-align: right; | ||
46 | } | ||
47 | #jump_to, #jump_wrapper { | ||
48 | position: fixed; | ||
49 | right: 0; top: 0; | ||
50 | padding: 5px 10px; | ||
51 | } | ||
52 | #jump_wrapper { | ||
53 | padding: 0; | ||
54 | display: none; | ||
55 | } | ||
56 | #jump_to:hover #jump_wrapper { | ||
57 | display: block; | ||
58 | } | ||
59 | #jump_page { | ||
60 | padding: 5px 0 3px; | ||
61 | margin: 0 0 25px 25px; | ||
62 | } | ||
63 | #jump_page .source { | ||
64 | display: block; | ||
65 | padding: 5px 10px; | ||
66 | text-decoration: none; | ||
67 | border-top: 1px solid #eee; | ||
68 | } | ||
69 | #jump_page .source:hover { | ||
70 | background: #f5f5ff; | ||
71 | } | ||
72 | #jump_page .source:first-child { | ||
73 | } | ||
74 | div.docs { | ||
75 | float: left; | ||
76 | max-width: 500px; | ||
77 | min-width: 500px; | ||
78 | min-height: 5px; | ||
79 | padding: 10px 25px 1px 50px; | ||
80 | vertical-align: top; | ||
81 | text-align: left; | ||
82 | } | ||
83 | .docs pre { | ||
84 | margin: 15px 0 15px; | ||
85 | padding-left: 15px; | ||
86 | overflow-y: scroll; | ||
87 | } | ||
88 | .docs p tt, .docs p code { | ||
89 | background: #f8f8ff; | ||
90 | border: 1px solid #dedede; | ||
91 | font-size: 12px; | ||
92 | padding: 0 0.2em; | ||
93 | } | ||
94 | .octowrap { | ||
95 | position: relative; | ||
96 | } | ||
97 | .octothorpe { | ||
98 | font: 12px Arial; | ||
99 | text-decoration: none; | ||
100 | color: #454545; | ||
101 | position: absolute; | ||
102 | top: 3px; left: -20px; | ||
103 | padding: 1px 2px; | ||
104 | opacity: 0; | ||
105 | -webkit-transition: opacity 0.2s linear; | ||
106 | } | ||
107 | div.docs:hover .octothorpe { | ||
108 | opacity: 1; | ||
109 | } | ||
110 | div.code { | ||
111 | margin-left: 580px; | ||
112 | padding: 14px 15px 16px 50px; | ||
113 | vertical-align: top; | ||
114 | } | ||
115 | .code pre, .docs p code { | ||
116 | font-size: 12px; | ||
117 | } | ||
118 | pre, tt, code { | ||
119 | line-height: 18px; | ||
120 | font-family: Monaco, Consolas, "Lucida Console", monospace; | ||
121 | margin: 0; padding: 0; | ||
122 | } | ||
123 | div.clearall { | ||
124 | clear: both; | ||
125 | } | ||
126 | |||
127 | |||
128 | /*---------------------- Syntax Highlighting -----------------------------*/ | ||
129 | td.linenos { background-color: #f0f0f0; padding-right: 10px; } | ||
130 | span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; } | ||
131 | body .hll { background-color: #ffffcc } | ||
132 | body .c { color: #408080; font-style: italic } /* Comment */ | ||
133 | body .err { border: 1px solid #FF0000 } /* Error */ | ||
134 | body .k { color: #954121 } /* Keyword */ | ||
135 | body .o { color: #666666 } /* Operator */ | ||
136 | body .cm { color: #408080; font-style: italic } /* Comment.Multiline */ | ||
137 | body .cp { color: #BC7A00 } /* Comment.Preproc */ | ||
138 | body .c1 { color: #408080; font-style: italic } /* Comment.Single */ | ||
139 | body .cs { color: #408080; font-style: italic } /* Comment.Special */ | ||
140 | body .gd { color: #A00000 } /* Generic.Deleted */ | ||
141 | body .ge { font-style: italic } /* Generic.Emph */ | ||
142 | body .gr { color: #FF0000 } /* Generic.Error */ | ||
143 | body .gh { color: #000080; font-weight: bold } /* Generic.Heading */ | ||
144 | body .gi { color: #00A000 } /* Generic.Inserted */ | ||
145 | body .go { color: #808080 } /* Generic.Output */ | ||
146 | body .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ | ||
147 | body .gs { font-weight: bold } /* Generic.Strong */ | ||
148 | body .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ | ||
149 | body .gt { color: #0040D0 } /* Generic.Traceback */ | ||
150 | body .kc { color: #954121 } /* Keyword.Constant */ | ||
151 | body .kd { color: #954121; font-weight: bold } /* Keyword.Declaration */ | ||
152 | body .kn { color: #954121; font-weight: bold } /* Keyword.Namespace */ | ||
153 | body .kp { color: #954121 } /* Keyword.Pseudo */ | ||
154 | body .kr { color: #954121; font-weight: bold } /* Keyword.Reserved */ | ||
155 | body .kt { color: #B00040 } /* Keyword.Type */ | ||
156 | body .m { color: #666666 } /* Literal.Number */ | ||
157 | body .s { color: #219161 } /* Literal.String */ | ||
158 | body .na { color: #7D9029 } /* Name.Attribute */ | ||
159 | body .nb { color: #954121 } /* Name.Builtin */ | ||
160 | body .nc { color: #0000FF; font-weight: bold } /* Name.Class */ | ||
161 | body .no { color: #880000 } /* Name.Constant */ | ||
162 | body .nd { color: #AA22FF } /* Name.Decorator */ | ||
163 | body .ni { color: #999999; font-weight: bold } /* Name.Entity */ | ||
164 | body .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ | ||
165 | body .nf { color: #0000FF } /* Name.Function */ | ||
166 | body .nl { color: #A0A000 } /* Name.Label */ | ||
167 | body .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ | ||
168 | body .nt { color: #954121; font-weight: bold } /* Name.Tag */ | ||
169 | body .nv { color: #19469D } /* Name.Variable */ | ||
170 | body .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ | ||
171 | body .w { color: #bbbbbb } /* Text.Whitespace */ | ||
172 | body .mf { color: #666666 } /* Literal.Number.Float */ | ||
173 | body .mh { color: #666666 } /* Literal.Number.Hex */ | ||
174 | body .mi { color: #666666 } /* Literal.Number.Integer */ | ||
175 | body .mo { color: #666666 } /* Literal.Number.Oct */ | ||
176 | body .sb { color: #219161 } /* Literal.String.Backtick */ | ||
177 | body .sc { color: #219161 } /* Literal.String.Char */ | ||
178 | body .sd { color: #219161; font-style: italic } /* Literal.String.Doc */ | ||
179 | body .s2 { color: #219161 } /* Literal.String.Double */ | ||
180 | body .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ | ||
181 | body .sh { color: #219161 } /* Literal.String.Heredoc */ | ||
182 | body .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ | ||
183 | body .sx { color: #954121 } /* Literal.String.Other */ | ||
184 | body .sr { color: #BB6688 } /* Literal.String.Regex */ | ||
185 | body .s1 { color: #219161 } /* Literal.String.Single */ | ||
186 | body .ss { color: #19469D } /* Literal.String.Symbol */ | ||
187 | body .bp { color: #954121 } /* Name.Builtin.Pseudo */ | ||
188 | body .vc { color: #19469D } /* Name.Variable.Class */ | ||
189 | body .vg { color: #19469D } /* Name.Variable.Global */ | ||
190 | body .vi { color: #19469D } /* Name.Variable.Instance */ | ||
191 | body .il { color: #666666 } /* Literal.Number.Integer.Long */ | ||
diff --git a/docs/schema.html b/docs/schema.html new file mode 100644 index 0000000..0564f26 --- /dev/null +++ b/docs/schema.html | |||
@@ -0,0 +1,949 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>schema.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>schema.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | |||
21 | </div> | ||
22 | <div class='code'> | ||
23 | <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">os</span> | ||
24 | <span class="kn">import</span> <span class="nn">json</span> | ||
25 | <span class="kn">import</span> <span class="nn">re</span> | ||
26 | <span class="kn">import</span> <span class="nn">urllib.parse</span> | ||
27 | <span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span> | ||
28 | <span class="kn">import</span> <span class="nn">singer</span> | ||
29 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">metadata</span> | ||
30 | <span class="kn">from</span> <span class="nn">tap_google_sheets.streams</span> <span class="kn">import</span> <span class="n">STREAMS</span> | ||
31 | |||
32 | <span class="n">LOGGER</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span></pre></div> | ||
33 | </div> | ||
34 | </div> | ||
35 | <div class='clearall'></div> | ||
36 | <div class='section' id='section-1'> | ||
37 | <div class='docs'> | ||
38 | <div class='octowrap'> | ||
39 | <a class='octothorpe' href='#section-1'>#</a> | ||
40 | </div> | ||
41 | <p>Convert column index to column letter</p> | ||
42 | </div> | ||
43 | <div class='code'> | ||
44 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">colnum_string</span><span class="p">(</span><span class="n">num</span><span class="p">):</span></pre></div> | ||
45 | </div> | ||
46 | </div> | ||
47 | <div class='clearall'></div> | ||
48 | <div class='section' id='section-2'> | ||
49 | <div class='docs'> | ||
50 | <div class='octowrap'> | ||
51 | <a class='octothorpe' href='#section-2'>#</a> | ||
52 | </div> | ||
53 | |||
54 | </div> | ||
55 | <div class='code'> | ||
56 | <div class="highlight"><pre> <span class="n">string</span> <span class="o">=</span> <span class="s2">""</span> | ||
57 | <span class="k">while</span> <span class="n">num</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span> | ||
58 | <span class="n">num</span><span class="p">,</span> <span class="n">remainder</span> <span class="o">=</span> <span class="nb">divmod</span><span class="p">(</span><span class="n">num</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">26</span><span class="p">)</span> | ||
59 | <span class="n">string</span> <span class="o">=</span> <span class="nb">chr</span><span class="p">(</span><span class="mi">65</span> <span class="o">+</span> <span class="n">remainder</span><span class="p">)</span> <span class="o">+</span> <span class="n">string</span> | ||
60 | <span class="k">return</span> <span class="n">string</span></pre></div> | ||
61 | </div> | ||
62 | </div> | ||
63 | <div class='clearall'></div> | ||
64 | <div class='section' id='section-3'> | ||
65 | <div class='docs'> | ||
66 | <div class='octowrap'> | ||
67 | <a class='octothorpe' href='#section-3'>#</a> | ||
68 | </div> | ||
69 | <hr /> | ||
70 | </div> | ||
71 | <div class='code'> | ||
72 | <div class="highlight"><pre></pre></div> | ||
73 | </div> | ||
74 | </div> | ||
75 | <div class='clearall'></div> | ||
76 | <div class='section' id='section-4'> | ||
77 | <div class='docs'> | ||
78 | <div class='octowrap'> | ||
79 | <a class='octothorpe' href='#section-4'>#</a> | ||
80 | </div> | ||
81 | <p>The goal of this function is to get the JSON schema of the sheet you pass in. Our return values here | ||
82 | are <code>sheet_json_schema</code> and <code>columns</code>, an <code>OrderedDict</code> and a list respectively.</p> | ||
83 | </div> | ||
84 | <div class='code'> | ||
85 | <div class="highlight"><pre></pre></div> | ||
86 | </div> | ||
87 | </div> | ||
88 | <div class='clearall'></div> | ||
89 | <div class='section' id='section-5'> | ||
90 | <div class='docs'> | ||
91 | <div class='octowrap'> | ||
92 | <a class='octothorpe' href='#section-5'>#</a> | ||
93 | </div> | ||
94 | <p>This function is massive and we will discuss it in the following parts:</p> | ||
95 | </div> | ||
96 | <div class='code'> | ||
97 | <div class="highlight"><pre></pre></div> | ||
98 | </div> | ||
99 | </div> | ||
100 | <div class='clearall'></div> | ||
101 | <div class='section' id='section-6'> | ||
102 | <div class='docs'> | ||
103 | <div class='octowrap'> | ||
104 | <a class='octothorpe' href='#section-6'>#</a> | ||
105 | </div> | ||
106 | <ul> | ||
107 | <li>Part 1</li> | ||
108 | <li>Part 2<ul> | ||
109 | <li>Part 2A</li> | ||
110 | <li>Part 2B<ul> | ||
111 | <li>Part 3</li> | ||
112 | <li>Part 4</li> | ||
113 | </ul> | ||
114 | </li> | ||
115 | </ul> | ||
116 | </li> | ||
117 | </ul> | ||
118 | </div> | ||
119 | <div class='code'> | ||
120 | <div class="highlight"><pre></pre></div> | ||
121 | </div> | ||
122 | </div> | ||
123 | <div class='clearall'></div> | ||
124 | <div class='section' id='section-7'> | ||
125 | <div class='docs'> | ||
126 | <div class='octowrap'> | ||
127 | <a class='octothorpe' href='#section-7'>#</a> | ||
128 | </div> | ||
129 | <p>Part 1 is just setting up constants and variables. We can skim through this part.</p> | ||
130 | </div> | ||
131 | <div class='code'> | ||
132 | <div class="highlight"><pre></pre></div> | ||
133 | </div> | ||
134 | </div> | ||
135 | <div class='clearall'></div> | ||
136 | <div class='section' id='section-8'> | ||
137 | <div class='docs'> | ||
138 | <div class='octowrap'> | ||
139 | <a class='octothorpe' href='#section-8'>#</a> | ||
140 | </div> | ||
141 | <p>Part 2 is split into two parts because it’s a loop over the column and there’s two ways to handle a | ||
142 | column.</p> | ||
143 | </div> | ||
144 | <div class='code'> | ||
145 | <div class="highlight"><pre></pre></div> | ||
146 | </div> | ||
147 | </div> | ||
148 | <div class='clearall'></div> | ||
149 | <div class='section' id='section-9'> | ||
150 | <div class='docs'> | ||
151 | <div class='octowrap'> | ||
152 | <a class='octothorpe' href='#section-9'>#</a> | ||
153 | </div> | ||
154 | <p>We’ll consider 2A to be the “skip this column” case.</p> | ||
155 | </div> | ||
156 | <div class='code'> | ||
157 | <div class="highlight"><pre></pre></div> | ||
158 | </div> | ||
159 | </div> | ||
160 | <div class='clearall'></div> | ||
161 | <div class='section' id='section-10'> | ||
162 | <div class='docs'> | ||
163 | <div class='octowrap'> | ||
164 | <a class='octothorpe' href='#section-10'>#</a> | ||
165 | </div> | ||
166 | <p>We’ll consider 2B as the “not skipped” case. In which we determine a field’s type (Part 3) and then | ||
167 | use the type to decide the JSON Schema (Part 4).</p> | ||
168 | </div> | ||
169 | <div class='code'> | ||
170 | <div class="highlight"><pre></pre></div> | ||
171 | </div> | ||
172 | </div> | ||
173 | <div class='clearall'></div> | ||
174 | <div class='section' id='section-11'> | ||
175 | <div class='docs'> | ||
176 | <div class='octowrap'> | ||
177 | <a class='octothorpe' href='#section-11'>#</a> | ||
178 | </div> | ||
179 | <hr /> | ||
180 | <p>Create sheet_metadata_json with columns from sheet</p> | ||
181 | </div> | ||
182 | <div class='code'> | ||
183 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_sheet_schema_columns</span><span class="p">(</span><span class="n">sheet</span><span class="p">):</span></pre></div> | ||
184 | </div> | ||
185 | </div> | ||
186 | <div class='clearall'></div> | ||
187 | <div class='section' id='section-12'> | ||
188 | <div class='docs'> | ||
189 | <div class='octowrap'> | ||
190 | <a class='octothorpe' href='#section-12'>#</a> | ||
191 | </div> | ||
192 | <p>The input to this function is shaped like</p> | ||
193 | <pre><code class="language-JSON">{ | ||
194 | "data" : [ | ||
195 | { | ||
196 | "rowData": [ | ||
197 | {"values": <thing 1>}, | ||
198 | {"values": <thing 2>} | ||
199 | ] | ||
200 | } | ||
201 | ] | ||
202 | } | ||
203 | </code></pre> | ||
204 | <p>Return Values</p> | ||
205 | <ul> | ||
206 | <li> | ||
207 | <p>columns</p> | ||
208 | <ul> | ||
209 | <li>A <code>column</code> that goes into <code>columns</code> is a dictionary with keys <code>"columnIndex"</code>, | ||
210 | <code>"columnLetter"</code>, <code>"columnName"</code>, <code>"columnType"</code>, and <code>"columnSkipped"</code>.</li> | ||
211 | </ul> | ||
212 | </li> | ||
213 | <li> | ||
214 | <p>sheet_json_schema</p> | ||
215 | <ul> | ||
216 | <li>A <code>col_properties</code> that goes into <code>sheet_json_schema['properties'][column_name]</code> is the JSON | ||
217 | schema of <code>column_name</code>.</li> | ||
218 | </ul> | ||
219 | </li> | ||
220 | </ul> | ||
221 | </div> | ||
222 | <div class='code'> | ||
223 | <div class="highlight"><pre> <span class="n">sheet_title</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span> | ||
224 | <span class="n">sheet_json_schema</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span> | ||
225 | <span class="n">data</span> <span class="o">=</span> <span class="nb">next</span><span class="p">(</span><span class="nb">iter</span><span class="p">(</span><span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'data'</span><span class="p">,</span> <span class="p">[])),</span> <span class="p">{})</span> | ||
226 | <span class="n">row_data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rowData'</span><span class="p">,</span> <span class="p">[])</span> | ||
227 | <span class="k">if</span> <span class="n">row_data</span> <span class="o">==</span> <span class="p">[]:</span> | ||
228 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'SKIPPING Empty Sheet: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
229 | <span class="k">return</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span></pre></div> | ||
230 | </div> | ||
231 | </div> | ||
232 | <div class='clearall'></div> | ||
233 | <div class='section' id='section-13'> | ||
234 | <div class='docs'> | ||
235 | <div class='octowrap'> | ||
236 | <a class='octothorpe' href='#section-13'>#</a> | ||
237 | </div> | ||
238 | <p>So this function starts by unpacking it into two lists, <code>headers</code> and <code>first_values</code>, which is | ||
239 | “thing 1” and “thing 2” respectively.</p> | ||
240 | </div> | ||
241 | <div class='code'> | ||
242 | <div class="highlight"><pre> <span class="n">headers</span> <span class="o">=</span> <span class="n">row_data</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'values'</span><span class="p">,</span> <span class="p">[])</span> | ||
243 | <span class="n">first_values</span> <span class="o">=</span> <span class="n">row_data</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'values'</span><span class="p">,</span> <span class="p">[])</span></pre></div> | ||
244 | </div> | ||
245 | </div> | ||
246 | <div class='clearall'></div> | ||
247 | <div class='section' id='section-14'> | ||
248 | <div class='docs'> | ||
249 | <div class='octowrap'> | ||
250 | <a class='octothorpe' href='#section-14'>#</a> | ||
251 | </div> | ||
252 | <p>All of the objects in <code>headers</code> and <code>first_values</code> have the following shape:</p> | ||
253 | </div> | ||
254 | <div class='code'> | ||
255 | <div class="highlight"><pre></pre></div> | ||
256 | </div> | ||
257 | </div> | ||
258 | <div class='clearall'></div> | ||
259 | <div class='section' id='section-15'> | ||
260 | <div class='docs'> | ||
261 | <div class='octowrap'> | ||
262 | <a class='octothorpe' href='#section-15'>#</a> | ||
263 | </div> | ||
264 | <pre><code class="language-JSON">{ | ||
265 | "userEnteredValue": {"stringValue": "time1"}, | ||
266 | "effectiveValue": {"stringValue": "time1"}, | ||
267 | "formattedValue": "time1", | ||
268 | "userEnteredFormat": {...}, | ||
269 | "effectiveFormat": {} | ||
270 | } | ||
271 | </code></pre> | ||
272 | </div> | ||
273 | <div class='code'> | ||
274 | <div class="highlight"><pre></pre></div> | ||
275 | </div> | ||
276 | </div> | ||
277 | <div class='clearall'></div> | ||
278 | <div class='section' id='section-16'> | ||
279 | <div class='docs'> | ||
280 | <div class='octowrap'> | ||
281 | <a class='octothorpe' href='#section-16'>#</a> | ||
282 | </div> | ||
283 | <p>The base Sheet schema</p> | ||
284 | </div> | ||
285 | <div class='code'> | ||
286 | <div class="highlight"><pre> <span class="n">sheet_json_schema</span> <span class="o">=</span> <span class="p">{</span> | ||
287 | <span class="s1">'type'</span><span class="p">:</span> <span class="s1">'object'</span><span class="p">,</span> | ||
288 | <span class="s1">'additionalProperties'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> | ||
289 | <span class="s1">'properties'</span><span class="p">:</span> <span class="p">{</span> | ||
290 | <span class="s1">'__sdc_spreadsheet_id'</span><span class="p">:</span> <span class="p">{</span> | ||
291 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]</span> | ||
292 | <span class="p">},</span> | ||
293 | <span class="s1">'__sdc_sheet_id'</span><span class="p">:</span> <span class="p">{</span> | ||
294 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'integer'</span><span class="p">]</span> | ||
295 | <span class="p">},</span> | ||
296 | <span class="s1">'__sdc_row'</span><span class="p">:</span> <span class="p">{</span> | ||
297 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'integer'</span><span class="p">]</span> | ||
298 | <span class="p">}</span> | ||
299 | <span class="p">}</span> | ||
300 | <span class="p">}</span> | ||
301 | |||
302 | <span class="n">header_list</span> <span class="o">=</span> <span class="p">[]</span> <span class="c1"># used for checking uniqueness</span> | ||
303 | <span class="n">columns</span> <span class="o">=</span> <span class="p">[]</span> | ||
304 | <span class="n">prior_header</span> <span class="o">=</span> <span class="kc">None</span> | ||
305 | <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span> | ||
306 | <span class="n">skipped</span> <span class="o">=</span> <span class="mi">0</span></pre></div> | ||
307 | </div> | ||
308 | </div> | ||
309 | <div class='clearall'></div> | ||
310 | <div class='section' id='section-17'> | ||
311 | <div class='docs'> | ||
312 | <div class='octowrap'> | ||
313 | <a class='octothorpe' href='#section-17'>#</a> | ||
314 | </div> | ||
315 | <p>We loop over the columns in the <code>headers</code> list and accummulate an object in each return | ||
316 | variable.</p> | ||
317 | </div> | ||
318 | <div class='code'> | ||
319 | <div class="highlight"><pre> <span class="k">for</span> <span class="n">header</span> <span class="ow">in</span> <span class="n">headers</span><span class="p">:</span> | ||
320 | <span class="n">column_index</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span> | ||
321 | <span class="n">column_letter</span> <span class="o">=</span> <span class="n">colnum_string</span><span class="p">(</span><span class="n">column_index</span><span class="p">)</span> | ||
322 | <span class="n">header_value</span> <span class="o">=</span> <span class="n">header</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'formattedValue'</span><span class="p">)</span> | ||
323 | <span class="k">if</span> <span class="n">header_value</span><span class="p">:</span> <span class="c1"># NOT skipped</span></pre></div> | ||
324 | </div> | ||
325 | </div> | ||
326 | <div class='clearall'></div> | ||
327 | <div class='section' id='section-18'> | ||
328 | <div class='docs'> | ||
329 | <div class='octowrap'> | ||
330 | <a class='octothorpe' href='#section-18'>#</a> | ||
331 | </div> | ||
332 | <p>Assuming the column we are looking at does not get skipped, we have to figure out the | ||
333 | schema.</p> | ||
334 | </div> | ||
335 | <div class='code'> | ||
336 | <div class="highlight"><pre> <span class="n">column_is_skipped</span> <span class="o">=</span> <span class="kc">False</span></pre></div> | ||
337 | </div> | ||
338 | </div> | ||
339 | <div class='clearall'></div> | ||
340 | <div class='section' id='section-19'> | ||
341 | <div class='docs'> | ||
342 | <div class='octowrap'> | ||
343 | <a class='octothorpe' href='#section-19'>#</a> | ||
344 | </div> | ||
345 | <p>First we reset the counter for consecutive skipped columns.</p> | ||
346 | </div> | ||
347 | <div class='code'> | ||
348 | <div class="highlight"><pre> <span class="n">skipped</span> <span class="o">=</span> <span class="mi">0</span></pre></div> | ||
349 | </div> | ||
350 | </div> | ||
351 | <div class='clearall'></div> | ||
352 | <div class='section' id='section-20'> | ||
353 | <div class='docs'> | ||
354 | <div class='octowrap'> | ||
355 | <a class='octothorpe' href='#section-20'>#</a> | ||
356 | </div> | ||
357 | <p>Then we let the name of this column be the value of <code>formattedValue</code> from the <code>header</code> | ||
358 | object we are looking at. This seems to be the value rendered in Google Sheets in the | ||
359 | cell.</p> | ||
360 | </div> | ||
361 | <div class='code'> | ||
362 | <div class="highlight"><pre> <span class="n">column_name</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">header_value</span><span class="p">)</span></pre></div> | ||
363 | </div> | ||
364 | </div> | ||
365 | <div class='clearall'></div> | ||
366 | <div class='section' id='section-21'> | ||
367 | <div class='docs'> | ||
368 | <div class='octowrap'> | ||
369 | <a class='octothorpe' href='#section-21'>#</a> | ||
370 | </div> | ||
371 | <p>We assert that this column name is unique or else we raise a “Duplicate Header Error”.</p> | ||
372 | </div> | ||
373 | <div class='code'> | ||
374 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">column_name</span> <span class="ow">in</span> <span class="n">header_list</span><span class="p">:</span> | ||
375 | <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">'DUPLICATE HEADER ERROR: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">1'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
376 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">))</span> | ||
377 | <span class="n">header_list</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">column_name</span><span class="p">)</span></pre></div> | ||
378 | </div> | ||
379 | </div> | ||
380 | <div class='clearall'></div> | ||
381 | <div class='section' id='section-22'> | ||
382 | <div class='docs'> | ||
383 | <div class='octowrap'> | ||
384 | <a class='octothorpe' href='#section-22'>#</a> | ||
385 | </div> | ||
386 | <p>We attempt to grab the value in the second row of the sheet (the first row of data) | ||
387 | associated with this column. Remember this row we are looking at is stored in | ||
388 | <code>first_values</code>. Note again that <code>headers</code> and <code>first_values</code> have the same shape.</p> | ||
389 | </div> | ||
390 | <div class='code'> | ||
391 | <div class="highlight"><pre> <span class="n">first_value</span> <span class="o">=</span> <span class="kc">None</span> | ||
392 | <span class="k">try</span><span class="p">:</span> | ||
393 | <span class="n">first_value</span> <span class="o">=</span> <span class="n">first_values</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> | ||
394 | <span class="k">except</span> <span class="ne">IndexError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span> | ||
395 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'NO VALUE IN 2ND ROW FOR HEADER. SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">2. </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
396 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">,</span> <span class="n">err</span><span class="p">))</span> | ||
397 | <span class="n">first_value</span> <span class="o">=</span> <span class="p">{}</span> | ||
398 | <span class="n">first_values</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">first_value</span><span class="p">)</span> | ||
399 | <span class="k">pass</span> | ||
400 | |||
401 | <span class="n">column_effective_value</span> <span class="o">=</span> <span class="n">first_value</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'effectiveValue'</span><span class="p">,</span> <span class="p">{})</span> | ||
402 | |||
403 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">None</span> | ||
404 | <span class="k">if</span> <span class="n">column_effective_value</span> <span class="o">==</span> <span class="p">{}:</span> | ||
405 | <span class="n">column_effective_value_type</span> <span class="o">=</span> <span class="s1">'stringValue'</span> | ||
406 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: NO VALUE IN 2ND ROW FOR HEADER. SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">2.'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
407 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">))</span> | ||
408 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">' Setting column datatype to STRING'</span><span class="p">)</span> | ||
409 | <span class="k">else</span><span class="p">:</span></pre></div> | ||
410 | </div> | ||
411 | </div> | ||
412 | <div class='clearall'></div> | ||
413 | <div class='section' id='section-23'> | ||
414 | <div class='docs'> | ||
415 | <div class='octowrap'> | ||
416 | <a class='octothorpe' href='#section-23'>#</a> | ||
417 | </div> | ||
418 | <p>The tap calls the value of <code>"effectiveValue"</code> the <code>column_effective_value</code>. This | ||
419 | dictionary can be empty or it can have a <code>key1</code> that looks like <code>"numberValue"</code>, | ||
420 | <code>"stringValue"</code>, or <code>"boolValue"</code>. If the dictionary is empty, we force <code>key1</code> to | ||
421 | be <code>"stringValue"</code>.</p> | ||
422 | </div> | ||
423 | <div class='code'> | ||
424 | <div class="highlight"><pre> <span class="k">for</span> <span class="n">key</span><span class="p">,</span> <span class="n">val</span> <span class="ow">in</span> <span class="n">column_effective_value</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> | ||
425 | <span class="k">if</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'numberValue'</span><span class="p">,</span> <span class="s1">'stringValue'</span><span class="p">,</span> <span class="s1">'boolValue'</span><span class="p">):</span> | ||
426 | <span class="n">column_effective_value_type</span> <span class="o">=</span> <span class="n">key</span> | ||
427 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">val</span><span class="p">)</span></pre></div> | ||
428 | </div> | ||
429 | </div> | ||
430 | <div class='clearall'></div> | ||
431 | <div class='section' id='section-24'> | ||
432 | <div class='docs'> | ||
433 | <div class='octowrap'> | ||
434 | <a class='octothorpe' href='#section-24'>#</a> | ||
435 | </div> | ||
436 | <p>Sometimes <code>key1</code> also looks like <code>"errorType"</code> or <code>"formulaType"</code>, but in | ||
437 | these cases, we raise a “Data Type Error” error immediately.</p> | ||
438 | </div> | ||
439 | <div class='code'> | ||
440 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">key</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'errorType'</span><span class="p">,</span> <span class="s1">'formulaType'</span><span class="p">):</span> | ||
441 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">val</span><span class="p">)</span> | ||
442 | <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">'DATA TYPE ERROR 2ND ROW VALUE: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">2, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
443 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">col_val</span><span class="p">))</span> | ||
444 | |||
445 | <span class="n">column_number_format</span> <span class="o">=</span> <span class="n">first_values</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'effectiveFormat'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> | ||
446 | <span class="s1">'numberFormat'</span><span class="p">,</span> <span class="p">{})</span></pre></div> | ||
447 | </div> | ||
448 | </div> | ||
449 | <div class='clearall'></div> | ||
450 | <div class='section' id='section-25'> | ||
451 | <div class='docs'> | ||
452 | <div class='octowrap'> | ||
453 | <a class='octothorpe' href='#section-25'>#</a> | ||
454 | </div> | ||
455 | <p>column_number_format_type = UNSPECIFIED, TEXT, NUMBER, PERCENT, CURRENCY, DATE</p> | ||
456 | <ul> | ||
457 | <li>TIME, DATE_TIME, SCIENTIFIC</li> | ||
458 | <li>https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#NumberFormatType</li> | ||
459 | </ul> | ||
460 | </div> | ||
461 | <div class='code'> | ||
462 | <div class="highlight"><pre> <span class="n">column_number_format_type</span> <span class="o">=</span> <span class="n">column_number_format</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'type'</span><span class="p">)</span></pre></div> | ||
463 | </div> | ||
464 | </div> | ||
465 | <div class='clearall'></div> | ||
466 | <div class='section' id='section-26'> | ||
467 | <div class='docs'> | ||
468 | <div class='octowrap'> | ||
469 | <a class='octothorpe' href='#section-26'>#</a> | ||
470 | </div> | ||
471 | <p>the giant if-elif-else block: All it does is set a variable <code>col_properties</code> and | ||
472 | <code>column_gs_type</code> based on the values of <code>column_effective_value_type</code> and | ||
473 | <code>column_number_format_type</code>.</p> | ||
474 | </div> | ||
475 | <div class='code'> | ||
476 | <div class="highlight"><pre> <span class="n">column_format</span> <span class="o">=</span> <span class="kc">None</span> | ||
477 | <span class="k">if</span> <span class="n">column_effective_value</span> <span class="o">==</span> <span class="p">{}:</span> | ||
478 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
479 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'stringValue'</span> | ||
480 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: 2ND ROW VALUE IS BLANK: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">2'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
481 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">))</span> | ||
482 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">' Setting column datatype to STRING'</span><span class="p">)</span></pre></div> | ||
483 | </div> | ||
484 | </div> | ||
485 | <div class='clearall'></div> | ||
486 | <div class='section' id='section-27'> | ||
487 | <div class='docs'> | ||
488 | <div class='octowrap'> | ||
489 | <a class='octothorpe' href='#section-27'>#</a> | ||
490 | </div> | ||
491 | <p>column_effective_value_type = numberValue, stringValue, boolValue</p> | ||
492 | <ul> | ||
493 | <li>INVALID: errorType, formulaType</li> | ||
494 | <li>https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ExtendedValue</li> | ||
495 | </ul> | ||
496 | </div> | ||
497 | <div class='code'> | ||
498 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">column_effective_value_type</span> <span class="o">==</span> <span class="s1">'stringValue'</span><span class="p">:</span> | ||
499 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
500 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'stringValue'</span> | ||
501 | <span class="k">elif</span> <span class="n">column_effective_value_type</span> <span class="o">==</span> <span class="s1">'boolValue'</span><span class="p">:</span> | ||
502 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'boolean'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
503 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'boolValue'</span> | ||
504 | <span class="k">elif</span> <span class="n">column_effective_value_type</span> <span class="o">==</span> <span class="s1">'numberValue'</span><span class="p">:</span> | ||
505 | <span class="k">if</span> <span class="n">column_number_format_type</span> <span class="o">==</span> <span class="s1">'DATE_TIME'</span><span class="p">:</span> | ||
506 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span> | ||
507 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">],</span> | ||
508 | <span class="s1">'format'</span><span class="p">:</span> <span class="s1">'date-time'</span> | ||
509 | <span class="p">}</span> | ||
510 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'numberType.DATE_TIME'</span> | ||
511 | <span class="k">elif</span> <span class="n">column_number_format_type</span> <span class="o">==</span> <span class="s1">'DATE'</span><span class="p">:</span> | ||
512 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span> | ||
513 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">],</span> | ||
514 | <span class="s1">'format'</span><span class="p">:</span> <span class="s1">'date'</span> | ||
515 | <span class="p">}</span> | ||
516 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'numberType.DATE'</span> | ||
517 | <span class="k">elif</span> <span class="n">column_number_format_type</span> <span class="o">==</span> <span class="s1">'TIME'</span><span class="p">:</span> | ||
518 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span> | ||
519 | <span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">],</span> | ||
520 | <span class="s1">'format'</span><span class="p">:</span> <span class="s1">'time'</span> | ||
521 | <span class="p">}</span> | ||
522 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'numberType.TIME'</span> | ||
523 | <span class="k">elif</span> <span class="n">column_number_format_type</span> <span class="o">==</span> <span class="s1">'TEXT'</span><span class="p">:</span> | ||
524 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
525 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'stringValue'</span> | ||
526 | <span class="k">else</span><span class="p">:</span> | ||
527 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="s1">'number'</span><span class="p">,</span> <span class="s1">'multipleOf'</span><span class="p">:</span> <span class="mf">1e-15</span><span class="p">}</span> | ||
528 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'numberType'</span> | ||
529 | <span class="k">else</span><span class="p">:</span> | ||
530 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
531 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'unsupportedValue'</span> | ||
532 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: UNSUPPORTED 2ND ROW VALUE: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">2, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
533 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">,</span> <span class="n">column_effective_value_type</span><span class="p">,</span> <span class="n">col_val</span><span class="p">))</span> | ||
534 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Converting to string.'</span><span class="p">)</span> | ||
535 | <span class="k">else</span><span class="p">:</span> <span class="c1"># skipped</span></pre></div> | ||
536 | </div> | ||
537 | </div> | ||
538 | <div class='clearall'></div> | ||
539 | <div class='section' id='section-28'> | ||
540 | <div class='docs'> | ||
541 | <div class='octowrap'> | ||
542 | <a class='octothorpe' href='#section-28'>#</a> | ||
543 | </div> | ||
544 | <p>We note that we are skipping this column. It still gets added to the schema though as | ||
545 | a string field. The only other notable thing about skipped columns is the we create | ||
546 | the field name for it, and it looks like <code>"__sdc_skip_col_XY"</code>, where the <code>XY</code> goes | ||
547 | from <code>"00"</code>, <code>"01"</code>, to <code>"99"</code>.</p> | ||
548 | </div> | ||
549 | <div class='code'> | ||
550 | <div class="highlight"><pre> <span class="n">column_is_skipped</span> <span class="o">=</span> <span class="kc">True</span> | ||
551 | <span class="n">skipped</span> <span class="o">=</span> <span class="n">skipped</span> <span class="o">+</span> <span class="mi">1</span> | ||
552 | <span class="n">column_index_str</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">column_index</span><span class="p">)</span><span class="o">.</span><span class="n">zfill</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> | ||
553 | <span class="n">column_name</span> <span class="o">=</span> <span class="s1">'__sdc_skip_col_</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">column_index_str</span><span class="p">)</span> | ||
554 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
555 | <span class="n">column_gs_type</span> <span class="o">=</span> <span class="s1">'stringValue'</span> | ||
556 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: SKIPPED COLUMN; NO COLUMN HEADER. SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}</span><span class="s1">1'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
557 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">))</span> | ||
558 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">' This column will be skipped during data loading.'</span><span class="p">)</span> | ||
559 | |||
560 | <span class="k">if</span> <span class="n">skipped</span> <span class="o">>=</span> <span class="mi">2</span><span class="p">:</span> | ||
561 | <span class="n">sheet_json_schema</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">prior_header</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
562 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'TWO CONSECUTIVE SKIPPED COLUMNS. STOPPING SCAN AT: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL </span><span class="si">{}</span><span class="s1">1'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
563 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">column_name</span><span class="p">,</span> <span class="n">column_letter</span><span class="p">))</span> | ||
564 | <span class="k">break</span> | ||
565 | |||
566 | <span class="k">else</span><span class="p">:</span> | ||
567 | <span class="n">column</span> <span class="o">=</span> <span class="p">{}</span> | ||
568 | <span class="n">column</span> <span class="o">=</span> <span class="p">{</span> | ||
569 | <span class="s1">'columnIndex'</span><span class="p">:</span> <span class="n">column_index</span><span class="p">,</span> | ||
570 | <span class="s1">'columnLetter'</span><span class="p">:</span> <span class="n">column_letter</span><span class="p">,</span> | ||
571 | <span class="s1">'columnName'</span><span class="p">:</span> <span class="n">column_name</span><span class="p">,</span> | ||
572 | <span class="s1">'columnType'</span><span class="p">:</span> <span class="n">column_gs_type</span><span class="p">,</span> | ||
573 | <span class="s1">'columnSkipped'</span><span class="p">:</span> <span class="n">column_is_skipped</span> | ||
574 | <span class="p">}</span> | ||
575 | <span class="n">columns</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">column</span><span class="p">)</span> | ||
576 | |||
577 | <span class="k">if</span> <span class="n">column_gs_type</span> <span class="ow">in</span> <span class="p">{</span><span class="s1">'numberType.DATE_TIME'</span><span class="p">,</span> <span class="s1">'numberType.DATE'</span><span class="p">,</span> <span class="s1">'numberType.TIME'</span><span class="p">,</span> <span class="s1">'numberType'</span><span class="p">}:</span> | ||
578 | <span class="n">col_properties</span> <span class="o">=</span> <span class="p">{</span> | ||
579 | <span class="s1">'anyOf'</span><span class="p">:</span> <span class="p">[</span> | ||
580 | <span class="n">col_properties</span><span class="p">,</span> | ||
581 | <span class="p">{</span><span class="s1">'type'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'null'</span><span class="p">,</span> <span class="s1">'string'</span><span class="p">]}</span> | ||
582 | <span class="p">]</span> | ||
583 | <span class="p">}</span> | ||
584 | |||
585 | <span class="n">sheet_json_schema</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">][</span><span class="n">column_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">col_properties</span> | ||
586 | |||
587 | <span class="n">prior_header</span> <span class="o">=</span> <span class="n">column_name</span> | ||
588 | <span class="n">i</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span> | ||
589 | |||
590 | <span class="k">return</span> <span class="n">sheet_json_schema</span><span class="p">,</span> <span class="n">columns</span></pre></div> | ||
591 | </div> | ||
592 | </div> | ||
593 | <div class='clearall'></div> | ||
594 | <div class='section' id='section-29'> | ||
595 | <div class='docs'> | ||
596 | <div class='octowrap'> | ||
597 | <a class='octothorpe' href='#section-29'>#</a> | ||
598 | </div> | ||
599 | <p>The point of this function seems to be (1) make a request to get a sheet (2) return the schema | ||
600 | generated for this sheet by <code>schema.py:get_sheet_schema_columns</code>.</p> | ||
601 | <p><code>get_sheet_metadata()</code> sets up a lot of variables to ultimately make a request to</p> | ||
602 | <pre><code class="language-Text">https://sheets.googleapis.com/v4/spreadsheets/my-spreadsheet-id?includeGridData=true&ranges='my-sheet-title'!1:2 | ||
603 | </code></pre> | ||
604 | </div> | ||
605 | <div class='code'> | ||
606 | <div class="highlight"><pre></pre></div> | ||
607 | </div> | ||
608 | </div> | ||
609 | <div class='clearall'></div> | ||
610 | <div class='section' id='section-30'> | ||
611 | <div class='docs'> | ||
612 | <div class='octowrap'> | ||
613 | <a class='octothorpe' href='#section-30'>#</a> | ||
614 | </div> | ||
615 | <p>Let’s dissect the query params here a bit.</p> | ||
616 | </div> | ||
617 | <div class='code'> | ||
618 | <div class="highlight"><pre></pre></div> | ||
619 | </div> | ||
620 | </div> | ||
621 | <div class='clearall'></div> | ||
622 | <div class='section' id='section-31'> | ||
623 | <div class='docs'> | ||
624 | <div class='octowrap'> | ||
625 | <a class='octothorpe' href='#section-31'>#</a> | ||
626 | </div> | ||
627 | <p><code>includeGridData</code> is false by default and setting this to true lets us get “Grid data”. If you | ||
628 | compare the same request but with that value flipped, then you’ll notice the <code>includeGridData=false</code> | ||
629 | gives you a relatively small response with no data in it. It seems like just a bunch of metadata.</p> | ||
630 | </div> | ||
631 | <div class='code'> | ||
632 | <div class="highlight"><pre></pre></div> | ||
633 | </div> | ||
634 | </div> | ||
635 | <div class='clearall'></div> | ||
636 | <div class='section' id='section-32'> | ||
637 | <div class='docs'> | ||
638 | <div class='octowrap'> | ||
639 | <a class='octothorpe' href='#section-32'>#</a> | ||
640 | </div> | ||
641 | <p><code>ranges</code> controls the rows returned.</p> | ||
642 | </div> | ||
643 | <div class='code'> | ||
644 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_sheet_metadata</span><span class="p">(</span><span class="n">sheet</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">client</span><span class="p">):</span></pre></div> | ||
645 | </div> | ||
646 | </div> | ||
647 | <div class='clearall'></div> | ||
648 | <div class='section' id='section-33'> | ||
649 | <div class='docs'> | ||
650 | <div class='octowrap'> | ||
651 | <a class='octothorpe' href='#section-33'>#</a> | ||
652 | </div> | ||
653 | <p>Get Header Row and 1st data row (Rows 1 & 2) from a Sheet on Spreadsheet w/ sheet_metadata query</p> | ||
654 | <ul> | ||
655 | <li>endpoint: spreadsheets/{spreadsheet_id}</li> | ||
656 | <li>params: includeGridData = true, ranges = ‘{sheet_title}’!1:2 | ||
657 | This endpoint includes detailed metadata about each cell - incl. data type, formatting, etc.</li> | ||
658 | </ul> | ||
659 | </div> | ||
660 | <div class='code'> | ||
661 | <div class="highlight"><pre> <span class="n">sheet_id</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheetId'</span><span class="p">)</span> | ||
662 | <span class="n">sheet_title</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span> | ||
663 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'sheet_id = </span><span class="si">{}</span><span class="s1">, sheet_title = </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_id</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">))</span> | ||
664 | |||
665 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'sheet_metadata'</span> | ||
666 | <span class="n">stream_metadata</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
667 | <span class="n">api</span> <span class="o">=</span> <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'sheets'</span><span class="p">)</span> | ||
668 | <span class="n">params</span> <span class="o">=</span> <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'params'</span><span class="p">,</span> <span class="p">{})</span> | ||
669 | <span class="n">sheet_title_encoded</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">)</span> | ||
670 | <span class="n">sheet_title_escaped</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">)</span> | ||
671 | <span class="n">querystring</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">(</span> | ||
672 | <span class="p">[</span><span class="s1">'</span><span class="si">%s</span><span class="s1">=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">()]</span> | ||
673 | <span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{sheet_title}</span><span class="s1">'</span><span class="p">,</span> <span class="n">sheet_title_encoded</span><span class="p">)</span> | ||
674 | <span class="n">path</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">?</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
675 | <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'path'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{spreadsheet_id}</span><span class="s1">'</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">),</span> | ||
676 | <span class="n">querystring</span> | ||
677 | <span class="p">)</span></pre></div> | ||
678 | </div> | ||
679 | </div> | ||
680 | <div class='clearall'></div> | ||
681 | <div class='section' id='section-34'> | ||
682 | <div class='docs'> | ||
683 | <div class='octowrap'> | ||
684 | <a class='octothorpe' href='#section-34'>#</a> | ||
685 | </div> | ||
686 | <p>See the Footnotes for this response shape</p> | ||
687 | </div> | ||
688 | <div class='code'> | ||
689 | <div class="highlight"><pre> <span class="n">sheet_md_results</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> <span class="n">endpoint</span><span class="o">=</span><span class="n">sheet_title_escaped</span><span class="p">)</span> | ||
690 | <span class="n">sheet_metadata</span> <span class="o">=</span> <span class="n">sheet_md_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> | ||
691 | |||
692 | |||
693 | <span class="k">try</span><span class="p">:</span></pre></div> | ||
694 | </div> | ||
695 | </div> | ||
696 | <div class='clearall'></div> | ||
697 | <div class='section' id='section-35'> | ||
698 | <div class='docs'> | ||
699 | <div class='octowrap'> | ||
700 | <a class='octothorpe' href='#section-35'>#</a> | ||
701 | </div> | ||
702 | <p>Create sheet_json_schema (for discovery/catalog) and columns (for sheet_metadata results)</p> | ||
703 | </div> | ||
704 | <div class='code'> | ||
705 | <div class="highlight"><pre> <span class="n">sheet_json_schema</span><span class="p">,</span> <span class="n">columns</span> <span class="o">=</span> <span class="n">get_sheet_schema_columns</span><span class="p">(</span><span class="n">sheet_metadata</span><span class="p">)</span> | ||
706 | <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span> | ||
707 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="p">))</span> | ||
708 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s1">'SKIPPING Malformed sheet: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
709 | <span class="n">sheet_json_schema</span><span class="p">,</span> <span class="n">columns</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="kc">None</span> | ||
710 | |||
711 | <span class="k">return</span> <span class="n">sheet_json_schema</span><span class="p">,</span> <span class="n">columns</span></pre></div> | ||
712 | </div> | ||
713 | </div> | ||
714 | <div class='clearall'></div> | ||
715 | <div class='section' id='section-36'> | ||
716 | <div class='docs'> | ||
717 | <div class='octowrap'> | ||
718 | <a class='octothorpe' href='#section-36'>#</a> | ||
719 | </div> | ||
720 | |||
721 | </div> | ||
722 | <div class='code'> | ||
723 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_abs_path</span><span class="p">(</span><span class="n">path</span><span class="p">):</span> | ||
724 | <span class="k">return</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">dirname</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">realpath</span><span class="p">(</span><span class="vm">__file__</span><span class="p">)),</span> <span class="n">path</span><span class="p">)</span></pre></div> | ||
725 | </div> | ||
726 | </div> | ||
727 | <div class='clearall'></div> | ||
728 | <div class='section' id='section-37'> | ||
729 | <div class='docs'> | ||
730 | <div class='octowrap'> | ||
731 | <a class='octothorpe' href='#section-37'>#</a> | ||
732 | </div> | ||
733 | <p>We initialize our return variables, <code>schemas</code> and <code>field_metadata</code> to empty dictionaries.</p> | ||
734 | <p>We loop over each stream in <code>streams.py:STREAMS</code>. We load the static JSON file into memory - all | ||
735 | four streams currently have some static schema. We store this on our return variable <code>schemas</code> | ||
736 | under the stream name.</p> | ||
737 | <p>We then call <code>singer.metadata.get_standard_metadata()</code> passing in whatever metadata we do have | ||
738 | (key properties, valid replication keys, the replication method). The return value here is | ||
739 | stored on our return variable <code>field_metadata</code> under the stream name.</p> | ||
740 | </div> | ||
741 | <div class='code'> | ||
742 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_schemas</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">):</span></pre></div> | ||
743 | </div> | ||
744 | </div> | ||
745 | <div class='clearall'></div> | ||
746 | <div class='section' id='section-38'> | ||
747 | <div class='docs'> | ||
748 | <div class='octowrap'> | ||
749 | <a class='octothorpe' href='#section-38'>#</a> | ||
750 | </div> | ||
751 | |||
752 | </div> | ||
753 | <div class='code'> | ||
754 | <div class="highlight"><pre> <span class="n">schemas</span> <span class="o">=</span> <span class="p">{}</span> | ||
755 | <span class="n">field_metadata</span> <span class="o">=</span> <span class="p">{}</span> | ||
756 | |||
757 | <span class="k">for</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">stream_metadata</span> <span class="ow">in</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">items</span><span class="p">():</span> | ||
758 | <span class="n">schema_path</span> <span class="o">=</span> <span class="n">get_abs_path</span><span class="p">(</span><span class="s1">'schemas/</span><span class="si">{}</span><span class="s1">.json'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
759 | <span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">schema_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">file</span><span class="p">:</span> | ||
760 | <span class="n">schema</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">file</span><span class="p">)</span> | ||
761 | <span class="n">schemas</span><span class="p">[</span><span class="n">stream_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">schema</span> | ||
762 | <span class="n">mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">new</span><span class="p">()</span> | ||
763 | |||
764 | <span class="n">mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get_standard_metadata</span><span class="p">(</span> | ||
765 | <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> | ||
766 | <span class="n">key_properties</span><span class="o">=</span><span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'key_properties'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span> | ||
767 | <span class="n">valid_replication_keys</span><span class="o">=</span><span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'replication_keys'</span><span class="p">,</span> <span class="kc">None</span><span class="p">),</span> | ||
768 | <span class="n">replication_method</span><span class="o">=</span><span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'replication_method'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
769 | <span class="p">)</span> | ||
770 | <span class="n">field_metadata</span><span class="p">[</span><span class="n">stream_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">mdata</span></pre></div> | ||
771 | </div> | ||
772 | </div> | ||
773 | <div class='clearall'></div> | ||
774 | <div class='section' id='section-39'> | ||
775 | <div class='docs'> | ||
776 | <div class='octowrap'> | ||
777 | <a class='octothorpe' href='#section-39'>#</a> | ||
778 | </div> | ||
779 | <p>If we are handling the <code>"spreadsheet_metadata"</code> stream, we do some extra work to build the | ||
780 | dynamic schemas of each Sheet we want to sync.. Otherwise, that’s it.</p> | ||
781 | </div> | ||
782 | <div class='code'> | ||
783 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">stream_name</span> <span class="o">==</span> <span class="s1">'spreadsheet_metadata'</span><span class="p">:</span></pre></div> | ||
784 | </div> | ||
785 | </div> | ||
786 | <div class='clearall'></div> | ||
787 | <div class='section' id='section-40'> | ||
788 | <div class='docs'> | ||
789 | <div class='octowrap'> | ||
790 | <a class='octothorpe' href='#section-40'>#</a> | ||
791 | </div> | ||
792 | <p>We ultimately end up making a <code>GET</code> to</p> | ||
793 | </div> | ||
794 | <div class='code'> | ||
795 | <div class="highlight"><pre></pre></div> | ||
796 | </div> | ||
797 | </div> | ||
798 | <div class='clearall'></div> | ||
799 | <div class='section' id='section-41'> | ||
800 | <div class='docs'> | ||
801 | <div class='octowrap'> | ||
802 | <a class='octothorpe' href='#section-41'>#</a> | ||
803 | </div> | ||
804 | <pre><code class="language-Text">https://sheets.googleapis.com/v4/spreadsheets/my-spreadsheet-id?includeGridData=false | ||
805 | </code></pre> | ||
806 | </div> | ||
807 | <div class='code'> | ||
808 | <div class="highlight"><pre></pre></div> | ||
809 | </div> | ||
810 | </div> | ||
811 | <div class='clearall'></div> | ||
812 | <div class='section' id='section-42'> | ||
813 | <div class='docs'> | ||
814 | <div class='octowrap'> | ||
815 | <a class='octothorpe' href='#section-42'>#</a> | ||
816 | </div> | ||
817 | <p>Notice this is <code>base_url + path + query_string</code>. There’s code here to figure out and | ||
818 | properly format <code>path</code> and <code>query_string</code>. I’m not sure why we don’t let <code>requests</code> | ||
819 | handle this.</p> | ||
820 | </div> | ||
821 | <div class='code'> | ||
822 | <div class="highlight"><pre></pre></div> | ||
823 | </div> | ||
824 | </div> | ||
825 | <div class='clearall'></div> | ||
826 | <div class='section' id='section-43'> | ||
827 | <div class='docs'> | ||
828 | <div class='octowrap'> | ||
829 | <a class='octothorpe' href='#section-43'>#</a> | ||
830 | </div> | ||
831 | <p>We assume this request is successful and we store the <code>OrderedDict</code> return value as | ||
832 | <code>spreadsheet_md_results</code>.</p> | ||
833 | </div> | ||
834 | <div class='code'> | ||
835 | <div class="highlight"><pre> <span class="n">api</span> <span class="o">=</span> <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'sheets'</span><span class="p">)</span> | ||
836 | <span class="n">params</span> <span class="o">=</span> <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'params'</span><span class="p">,</span> <span class="p">{})</span> | ||
837 | <span class="n">querystring</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">'</span><span class="si">%s</span><span class="s1">=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span> | ||
838 | <span class="n">path</span> <span class="o">=</span> <span class="s1">'</span><span class="si">{}</span><span class="s1">?</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
839 | <span class="n">stream_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'path'</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{spreadsheet_id}</span><span class="s1">'</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">),</span> | ||
840 | <span class="n">querystring</span> | ||
841 | <span class="p">)</span> | ||
842 | |||
843 | <span class="n">spreadsheet_md_results</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> | ||
844 | <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> | ||
845 | <span class="n">params</span><span class="o">=</span><span class="n">querystring</span><span class="p">,</span> | ||
846 | <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> | ||
847 | <span class="n">endpoint</span><span class="o">=</span><span class="n">stream_name</span> | ||
848 | <span class="p">)</span></pre></div> | ||
849 | </div> | ||
850 | </div> | ||
851 | <div class='clearall'></div> | ||
852 | <div class='section' id='section-44'> | ||
853 | <div class='docs'> | ||
854 | <div class='octowrap'> | ||
855 | <a class='octothorpe' href='#section-44'>#</a> | ||
856 | </div> | ||
857 | <p>The response here is one of those “envelope” kinds. The data we care about is under | ||
858 | the <code>"sheets"</code> key.</p> | ||
859 | </div> | ||
860 | <div class='code'> | ||
861 | <div class="highlight"><pre> <span class="n">sheets</span> <span class="o">=</span> <span class="n">spreadsheet_md_results</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">)</span> | ||
862 | <span class="k">if</span> <span class="n">sheets</span><span class="p">:</span></pre></div> | ||
863 | </div> | ||
864 | </div> | ||
865 | <div class='clearall'></div> | ||
866 | <div class='section' id='section-45'> | ||
867 | <div class='docs'> | ||
868 | <div class='octowrap'> | ||
869 | <a class='octothorpe' href='#section-45'>#</a> | ||
870 | </div> | ||
871 | <p>Looping over this array, we call <code>schema.py:get_sheet_metadata</code>. This gets the | ||
872 | JSON schema of each sheet found in this Google Doc. We use the sheet’s title as | ||
873 | the stream name here.</p> | ||
874 | </div> | ||
875 | <div class='code'> | ||
876 | <div class="highlight"><pre> <span class="k">for</span> <span class="n">sheet</span> <span class="ow">in</span> <span class="n">sheets</span><span class="p">:</span> | ||
877 | <span class="n">sheet_json_schema</span><span class="p">,</span> <span class="n">columns</span> <span class="o">=</span> <span class="n">get_sheet_metadata</span><span class="p">(</span><span class="n">sheet</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">client</span><span class="p">)</span> | ||
878 | |||
879 | <span class="k">if</span> <span class="n">sheet_json_schema</span> <span class="ow">and</span> <span class="n">columns</span><span class="p">:</span> | ||
880 | <span class="n">sheet_title</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span> | ||
881 | <span class="n">schemas</span><span class="p">[</span><span class="n">sheet_title</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_json_schema</span> | ||
882 | <span class="n">sheet_mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">new</span><span class="p">()</span> | ||
883 | <span class="n">sheet_mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">get_standard_metadata</span><span class="p">(</span> | ||
884 | <span class="n">schema</span><span class="o">=</span><span class="n">sheet_json_schema</span><span class="p">,</span> | ||
885 | <span class="n">key_properties</span><span class="o">=</span><span class="p">[</span><span class="s1">'__sdc_row'</span><span class="p">],</span> | ||
886 | <span class="n">valid_replication_keys</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> | ||
887 | <span class="n">replication_method</span><span class="o">=</span><span class="s1">'FULL_TABLE'</span> | ||
888 | <span class="p">)</span> | ||
889 | <span class="n">field_metadata</span><span class="p">[</span><span class="n">sheet_title</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_mdata</span> | ||
890 | |||
891 | <span class="k">return</span> <span class="n">schemas</span><span class="p">,</span> <span class="n">field_metadata</span></pre></div> | ||
892 | </div> | ||
893 | </div> | ||
894 | <div class='clearall'></div> | ||
895 | <div class='section' id='section-46'> | ||
896 | <div class='docs'> | ||
897 | <div class='octowrap'> | ||
898 | <a class='octothorpe' href='#section-46'>#</a> | ||
899 | </div> | ||
900 | <h1>Footnotes</h1> | ||
901 | <p>The shape of response is like, but note the tap stores this in the recursive <code>OrderedDict</code> structure</p> | ||
902 | </div> | ||
903 | <div class='code'> | ||
904 | <div class="highlight"><pre></pre></div> | ||
905 | </div> | ||
906 | </div> | ||
907 | <div class='clearall'></div> | ||
908 | <div class='section' id='section-47'> | ||
909 | <div class='docs'> | ||
910 | <div class='octowrap'> | ||
911 | <a class='octothorpe' href='#section-47'>#</a> | ||
912 | </div> | ||
913 | <pre><code class="language-JSON">{ | ||
914 | "spreadsheetid": "my-id", | ||
915 | "properties": {...}, | ||
916 | "sheets": [ | ||
917 | { | ||
918 | "properties": {}, | ||
919 | "data": [ | ||
920 | { | ||
921 | "rowData": [ | ||
922 | { | ||
923 | "values": [ | ||
924 | { | ||
925 | "userEnteredValue": {"stringValue": "time1"}, | ||
926 | "effectiveValue": {"stringValue": "time1"}, | ||
927 | "formattedValue": "time1", | ||
928 | "userEnteredFormat": {...}, | ||
929 | "effectiveFormat": {} | ||
930 | }, | ||
931 | ... | ||
932 | ], | ||
933 | }, | ||
934 | ... | ||
935 | ] | ||
936 | } | ||
937 | ] | ||
938 | }, | ||
939 | ] | ||
940 | } | ||
941 | </code></pre> | ||
942 | </div> | ||
943 | <div class='code'> | ||
944 | <div class="highlight"><pre></pre></div> | ||
945 | </div> | ||
946 | </div> | ||
947 | <div class='clearall'></div> | ||
948 | </div> | ||
949 | </body> | ||
diff --git a/docs/streams.html b/docs/streams.html new file mode 100644 index 0000000..8c9b6d9 --- /dev/null +++ b/docs/streams.html | |||
@@ -0,0 +1,185 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>streams.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>streams.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | <p><code>streams.py:STREAMS</code> is an <code>OrderedDict</code>. Only because we want to loop over it in the same order | ||
21 | every time.</p> | ||
22 | <p>It’s still the same global variable found in taps of this style. It maps stream names to a | ||
23 | dictionary describing the stream.</p> | ||
24 | <p>Some notable things we learn in this file:</p> | ||
25 | <ul> | ||
26 | <li> | ||
27 | <p><code>api</code> is either <code>"files"</code> or <code>"sheets"</code></p> | ||
28 | </li> | ||
29 | <li> | ||
30 | <p>We saw this used in <code>client.py:GoogleClient.request()</code> to switch the base url of the request</p> | ||
31 | </li> | ||
32 | <li> | ||
33 | <p><code>"file_metadata"</code> is the only incremental stream</p> | ||
34 | </li> | ||
35 | <li> | ||
36 | <p>Full table streams include:</p> | ||
37 | </li> | ||
38 | <li><code>"spreadsheet_metadata"</code></li> | ||
39 | <li><code>"sheet_metadata"</code></li> | ||
40 | <li> | ||
41 | <p><code>"sheets_loaded"</code></p> | ||
42 | </li> | ||
43 | <li> | ||
44 | <p><code>"sheets_loaded"</code> is the only stream with a <code>"data_key"</code></p> | ||
45 | </li> | ||
46 | <li>We typically see <code>data_key</code> be the name of the key to get data out of “envelope” responses</li> | ||
47 | </ul> | ||
48 | </div> | ||
49 | <div class='code'> | ||
50 | <div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span></pre></div> | ||
51 | </div> | ||
52 | </div> | ||
53 | <div class='clearall'></div> | ||
54 | <div class='section' id='section-1'> | ||
55 | <div class='docs'> | ||
56 | <div class='octowrap'> | ||
57 | <a class='octothorpe' href='#section-1'>#</a> | ||
58 | </div> | ||
59 | <p>streams: API URL endpoints to be called | ||
60 | properties: | ||
61 | <root node>: Plural stream name for the endpoint | ||
62 | path: API endpoint relative path, when added to the base URL, creates the full path, | ||
63 | default = stream_name | ||
64 | key_properties: Primary key fields for identifying an endpoint record. | ||
65 | replication_method: INCREMENTAL or FULL_TABLE | ||
66 | replication_keys: bookmark_field(s), typically a date-time, used for filtering the results | ||
67 | and setting the state | ||
68 | params: Query, sort, and other endpoint specific parameters; default = {} | ||
69 | data_key: JSON element containing the results list for the endpoint; | ||
70 | default = root (no data_key)</p> | ||
71 | </div> | ||
72 | <div class='code'> | ||
73 | <div class="highlight"><pre></pre></div> | ||
74 | </div> | ||
75 | </div> | ||
76 | <div class='clearall'></div> | ||
77 | <div class='section' id='section-2'> | ||
78 | <div class='docs'> | ||
79 | <div class='octowrap'> | ||
80 | <a class='octothorpe' href='#section-2'>#</a> | ||
81 | </div> | ||
82 | <p>file_metadata: Queries Google Drive API to get file information and see if file has been modified | ||
83 | Provides audit info about who and when last changed the file.</p> | ||
84 | </div> | ||
85 | <div class='code'> | ||
86 | <div class="highlight"><pre><span class="n">FILE_METADATA</span> <span class="o">=</span> <span class="p">{</span> | ||
87 | <span class="s2">"api"</span><span class="p">:</span> <span class="s2">"files"</span><span class="p">,</span> | ||
88 | <span class="s2">"path"</span><span class="p">:</span> <span class="s2">"files/</span><span class="si">{spreadsheet_id}</span><span class="s2">"</span><span class="p">,</span> | ||
89 | <span class="s2">"key_properties"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"id"</span><span class="p">],</span> | ||
90 | <span class="s2">"replication_method"</span><span class="p">:</span> <span class="s2">"INCREMENTAL"</span><span class="p">,</span> | ||
91 | <span class="s2">"replication_keys"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"modifiedTime"</span><span class="p">],</span> | ||
92 | <span class="s2">"params"</span><span class="p">:</span> <span class="p">{</span> | ||
93 | <span class="s2">"fields"</span><span class="p">:</span> <span class="s2">"id,name,createdTime,modifiedTime,version,teamDriveId,driveId,lastModifyingUser"</span> | ||
94 | <span class="p">}</span> | ||
95 | <span class="p">}</span></pre></div> | ||
96 | </div> | ||
97 | </div> | ||
98 | <div class='clearall'></div> | ||
99 | <div class='section' id='section-3'> | ||
100 | <div class='docs'> | ||
101 | <div class='octowrap'> | ||
102 | <a class='octothorpe' href='#section-3'>#</a> | ||
103 | </div> | ||
104 | <p>spreadsheet_metadata: Queries spreadsheet to get basic information on spreadhsheet and sheets</p> | ||
105 | </div> | ||
106 | <div class='code'> | ||
107 | <div class="highlight"><pre><span class="n">SPREADSHEET_METADATA</span> <span class="o">=</span> <span class="p">{</span> | ||
108 | <span class="s2">"api"</span><span class="p">:</span> <span class="s2">"sheets"</span><span class="p">,</span> | ||
109 | <span class="s2">"path"</span><span class="p">:</span> <span class="s2">"spreadsheets/</span><span class="si">{spreadsheet_id}</span><span class="s2">"</span><span class="p">,</span> | ||
110 | <span class="s2">"key_properties"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"spreadsheetId"</span><span class="p">],</span> | ||
111 | <span class="s2">"replication_method"</span><span class="p">:</span> <span class="s2">"FULL_TABLE"</span><span class="p">,</span> | ||
112 | <span class="s2">"params"</span><span class="p">:</span> <span class="p">{</span> | ||
113 | <span class="s2">"includeGridData"</span><span class="p">:</span> <span class="s2">"false"</span> | ||
114 | <span class="p">}</span> | ||
115 | <span class="p">}</span></pre></div> | ||
116 | </div> | ||
117 | </div> | ||
118 | <div class='clearall'></div> | ||
119 | <div class='section' id='section-4'> | ||
120 | <div class='docs'> | ||
121 | <div class='octowrap'> | ||
122 | <a class='octothorpe' href='#section-4'>#</a> | ||
123 | </div> | ||
124 | <p>sheet_metadata: Get Header Row and 1st data row (Rows 1 & 2) from a Sheet on Spreadsheet. | ||
125 | This endpoint includes detailed metadata about each cell in the header and first data row | ||
126 | incl. data type, formatting, etc.</p> | ||
127 | </div> | ||
128 | <div class='code'> | ||
129 | <div class="highlight"><pre><span class="n">SHEET_METADATA</span> <span class="o">=</span> <span class="p">{</span> | ||
130 | <span class="s2">"api"</span><span class="p">:</span> <span class="s2">"sheets"</span><span class="p">,</span> | ||
131 | <span class="s2">"path"</span><span class="p">:</span> <span class="s2">"spreadsheets/</span><span class="si">{spreadsheet_id}</span><span class="s2">"</span><span class="p">,</span> | ||
132 | <span class="s2">"key_properties"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"sheetId"</span><span class="p">],</span> | ||
133 | <span class="s2">"replication_method"</span><span class="p">:</span> <span class="s2">"FULL_TABLE"</span><span class="p">,</span> | ||
134 | <span class="s2">"params"</span><span class="p">:</span> <span class="p">{</span> | ||
135 | <span class="s2">"includeGridData"</span><span class="p">:</span> <span class="s2">"true"</span><span class="p">,</span> | ||
136 | <span class="s2">"ranges"</span><span class="p">:</span> <span class="s2">"'</span><span class="si">{sheet_title}</span><span class="s2">'!1:2"</span> | ||
137 | <span class="p">}</span> | ||
138 | <span class="p">}</span></pre></div> | ||
139 | </div> | ||
140 | </div> | ||
141 | <div class='clearall'></div> | ||
142 | <div class='section' id='section-5'> | ||
143 | <div class='docs'> | ||
144 | <div class='octowrap'> | ||
145 | <a class='octothorpe' href='#section-5'>#</a> | ||
146 | </div> | ||
147 | <p>sheets_loaded: Queries a batch of Rows for each Sheet in the Spreadsheet. | ||
148 | Each query uses the <code>values</code> endpoint, to get data-only, w/out the formatting/type metadata.</p> | ||
149 | </div> | ||
150 | <div class='code'> | ||
151 | <div class="highlight"><pre><span class="n">SHEETS_LOADED</span> <span class="o">=</span> <span class="p">{</span> | ||
152 | <span class="s2">"api"</span><span class="p">:</span> <span class="s2">"sheets"</span><span class="p">,</span> | ||
153 | <span class="s2">"path"</span><span class="p">:</span> <span class="s2">"spreadsheets/</span><span class="si">{spreadsheet_id}</span><span class="s2">/values/'</span><span class="si">{sheet_title}</span><span class="s2">'!</span><span class="si">{range_rows}</span><span class="s2">"</span><span class="p">,</span> | ||
154 | <span class="s2">"data_key"</span><span class="p">:</span> <span class="s2">"values"</span><span class="p">,</span> | ||
155 | <span class="s2">"key_properties"</span><span class="p">:</span> <span class="p">[</span><span class="s2">"spreadsheetId"</span><span class="p">,</span> <span class="s2">"sheetId"</span><span class="p">,</span> <span class="s2">"loadDate"</span><span class="p">],</span> | ||
156 | <span class="s2">"replication_method"</span><span class="p">:</span> <span class="s2">"FULL_TABLE"</span><span class="p">,</span> | ||
157 | <span class="s2">"params"</span><span class="p">:</span> <span class="p">{</span> | ||
158 | <span class="s2">"dateTimeRenderOption"</span><span class="p">:</span> <span class="s2">"SERIAL_NUMBER"</span><span class="p">,</span> | ||
159 | <span class="s2">"valueRenderOption"</span><span class="p">:</span> <span class="s2">"UNFORMATTED_VALUE"</span><span class="p">,</span> | ||
160 | <span class="s2">"majorDimension"</span><span class="p">:</span> <span class="s2">"ROWS"</span> | ||
161 | <span class="p">}</span> | ||
162 | <span class="p">}</span></pre></div> | ||
163 | </div> | ||
164 | </div> | ||
165 | <div class='clearall'></div> | ||
166 | <div class='section' id='section-6'> | ||
167 | <div class='docs'> | ||
168 | <div class='octowrap'> | ||
169 | <a class='octothorpe' href='#section-6'>#</a> | ||
170 | </div> | ||
171 | <p>Ensure streams are ordered sequentially, logically.</p> | ||
172 | </div> | ||
173 | <div class='code'> | ||
174 | <div class="highlight"><pre><span class="n">STREAMS</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span> | ||
175 | <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'file_metadata'</span><span class="p">]</span> <span class="o">=</span> <span class="n">FILE_METADATA</span> | ||
176 | <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'spreadsheet_metadata'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SPREADSHEET_METADATA</span> | ||
177 | <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'sheet_metadata'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SHEET_METADATA</span> | ||
178 | <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'sheets_loaded'</span><span class="p">]</span> <span class="o">=</span> <span class="n">SHEETS_LOADED</span> | ||
179 | |||
180 | </pre></div> | ||
181 | </div> | ||
182 | </div> | ||
183 | <div class='clearall'></div> | ||
184 | </div> | ||
185 | </body> | ||
diff --git a/docs/sync.html b/docs/sync.html new file mode 100644 index 0000000..97ef9fa --- /dev/null +++ b/docs/sync.html | |||
@@ -0,0 +1,1680 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>sync.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>sync.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | <p>This module contains the logic to sync data from the API.</p> | ||
21 | <hr /> | ||
22 | <p>Syncable streams: The tap seems to care about syncing the streams in this order.</p> | ||
23 | <ol> | ||
24 | <li><code>file_metadata</code></li> | ||
25 | <li><code>spreadsheet_metadata</code></li> | ||
26 | <li><em>N</em> Sheets</li> | ||
27 | <li><code>sheet_metadata</code></li> | ||
28 | <li><code>sheets_loaded</code></li> | ||
29 | <li><code>sheets_loaded</code></li> | ||
30 | </ol> | ||
31 | <hr /> | ||
32 | <p>The flow through this module is:</p> | ||
33 | <ol> | ||
34 | <li>Entrypoint: <code>sync()</code></li> | ||
35 | <li>Sync <code>file_metadata</code><ol> | ||
36 | <li><code>get_data()</code></li> | ||
37 | <li><code>transform_file_metadata()</code></li> | ||
38 | <li>Maybe exit the sync</li> | ||
39 | <li><code>sync_stream()</code></li> | ||
40 | </ol> | ||
41 | </li> | ||
42 | <li>Sync <code>spreadsheet_metadata</code><ol> | ||
43 | <li><code>get_data()</code></li> | ||
44 | <li><code>transform_spreadsheet_metadata()</code></li> | ||
45 | <li><code>sync_stream()</code></li> | ||
46 | </ol> | ||
47 | </li> | ||
48 | <li>Sync all of the Sheets. Here’s the process for a single Sheet<ol> | ||
49 | <li><code>get_sheet_metadata()</code></li> | ||
50 | <li><code>transform_sheet_metadata()</code></li> | ||
51 | <li><code>get_data()</code></li> | ||
52 | <li><code>transform_sheet_data()</code></li> | ||
53 | <li><code>process_records()</code></li> | ||
54 | </ol> | ||
55 | </li> | ||
56 | <li>Sync <code>sheet_metadata</code><ol> | ||
57 | <li><code>sync_stream()</code></li> | ||
58 | </ol> | ||
59 | </li> | ||
60 | <li>Sync <code>sheets_loaded</code><ol> | ||
61 | <li><code>sync_stream()</code></li> | ||
62 | </ol> | ||
63 | </li> | ||
64 | <li>Sync <code>sheets_loaded</code><ol> | ||
65 | <li><code>sync_stream()</code></li> | ||
66 | </ol> | ||
67 | </li> | ||
68 | </ol> | ||
69 | </div> | ||
70 | <div class='code'> | ||
71 | <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">time</span> | ||
72 | <span class="kn">import</span> <span class="nn">math</span> | ||
73 | <span class="kn">import</span> <span class="nn">json</span> | ||
74 | <span class="kn">import</span> <span class="nn">re</span> | ||
75 | <span class="kn">import</span> <span class="nn">urllib.parse</span> | ||
76 | <span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> | ||
77 | <span class="kn">import</span> <span class="nn">pytz</span> | ||
78 | <span class="kn">import</span> <span class="nn">singer</span> | ||
79 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">metrics</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="n">Transformer</span><span class="p">,</span> <span class="n">utils</span> | ||
80 | <span class="kn">from</span> <span class="nn">singer.utils</span> <span class="kn">import</span> <span class="n">strptime_to_utc</span><span class="p">,</span> <span class="n">strftime</span> | ||
81 | <span class="kn">from</span> <span class="nn">singer.messages</span> <span class="kn">import</span> <span class="n">RecordMessage</span> | ||
82 | <span class="kn">from</span> <span class="nn">tap_google_sheets.streams</span> <span class="kn">import</span> <span class="n">STREAMS</span> | ||
83 | <span class="kn">from</span> <span class="nn">tap_google_sheets.schema</span> <span class="kn">import</span> <span class="n">get_sheet_metadata</span> | ||
84 | |||
85 | <span class="n">LOGGER</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span></pre></div> | ||
86 | </div> | ||
87 | </div> | ||
88 | <div class='clearall'></div> | ||
89 | <div class='section' id='section-1'> | ||
90 | <div class='docs'> | ||
91 | <div class='octowrap'> | ||
92 | <a class='octothorpe' href='#section-1'>#</a> | ||
93 | </div> | ||
94 | <hr /> | ||
95 | </div> | ||
96 | <div class='code'> | ||
97 | <div class="highlight"><pre></pre></div> | ||
98 | </div> | ||
99 | </div> | ||
100 | <div class='clearall'></div> | ||
101 | <div class='section' id='section-2'> | ||
102 | <div class='docs'> | ||
103 | <div class='octowrap'> | ||
104 | <a class='octothorpe' href='#section-2'>#</a> | ||
105 | </div> | ||
106 | <h1>Helper Functions</h1> | ||
107 | </div> | ||
108 | <div class='code'> | ||
109 | <div class="highlight"><pre></pre></div> | ||
110 | </div> | ||
111 | </div> | ||
112 | <div class='clearall'></div> | ||
113 | <div class='section' id='section-3'> | ||
114 | <div class='docs'> | ||
115 | <div class='octowrap'> | ||
116 | <a class='octothorpe' href='#section-3'>#</a> | ||
117 | </div> | ||
118 | <hr /> | ||
119 | <p>Log that we write a schema via singer.write_schema</p> | ||
120 | </div> | ||
121 | <div class='code'> | ||
122 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
123 | </div> | ||
124 | </div> | ||
125 | <div class='clearall'></div> | ||
126 | <div class='section' id='section-4'> | ||
127 | <div class='docs'> | ||
128 | <div class='octowrap'> | ||
129 | <a class='octothorpe' href='#section-4'>#</a> | ||
130 | </div> | ||
131 | |||
132 | </div> | ||
133 | <div class='code'> | ||
134 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
135 | <span class="n">schema</span> <span class="o">=</span> <span class="n">stream</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span> | ||
136 | <span class="k">try</span><span class="p">:</span> | ||
137 | <span class="n">singer</span><span class="o">.</span><span class="n">write_schema</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">stream</span><span class="o">.</span><span class="n">key_properties</span><span class="p">)</span> | ||
138 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Writing schema for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
139 | <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span></pre></div> | ||
140 | </div> | ||
141 | </div> | ||
142 | <div class='clearall'></div> | ||
143 | <div class='section' id='section-5'> | ||
144 | <div class='docs'> | ||
145 | <div class='octowrap'> | ||
146 | <a class='octothorpe' href='#section-5'>#</a> | ||
147 | </div> | ||
148 | <p>QUESTION: When do we encounter an OSError?</p> | ||
149 | </div> | ||
150 | <div class='code'> | ||
151 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'OS Error writing schema for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
152 | <span class="k">raise</span> <span class="n">err</span></pre></div> | ||
153 | </div> | ||
154 | </div> | ||
155 | <div class='clearall'></div> | ||
156 | <div class='section' id='section-6'> | ||
157 | <div class='docs'> | ||
158 | <div class='octowrap'> | ||
159 | <a class='octothorpe' href='#section-6'>#</a> | ||
160 | </div> | ||
161 | <p>Write a RecordMessage, with the given version if it was passed in</p> | ||
162 | </div> | ||
163 | <div class='code'> | ||
164 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_record</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">record</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
165 | </div> | ||
166 | </div> | ||
167 | <div class='clearall'></div> | ||
168 | <div class='section' id='section-7'> | ||
169 | <div class='docs'> | ||
170 | <div class='octowrap'> | ||
171 | <a class='octothorpe' href='#section-7'>#</a> | ||
172 | </div> | ||
173 | |||
174 | </div> | ||
175 | <div class='code'> | ||
176 | <div class="highlight"><pre> <span class="k">try</span><span class="p">:</span> | ||
177 | <span class="k">if</span> <span class="n">version</span><span class="p">:</span> | ||
178 | <span class="n">singer</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span> | ||
179 | <span class="n">RecordMessage</span><span class="p">(</span> | ||
180 | <span class="n">stream</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
181 | <span class="n">record</span><span class="o">=</span><span class="n">record</span><span class="p">,</span> | ||
182 | <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span> | ||
183 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">))</span> | ||
184 | <span class="k">else</span><span class="p">:</span> | ||
185 | <span class="n">singer</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">write_record</span><span class="p">(</span> | ||
186 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
187 | <span class="n">record</span><span class="o">=</span><span class="n">record</span><span class="p">,</span> | ||
188 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">)</span> | ||
189 | <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span></pre></div> | ||
190 | </div> | ||
191 | </div> | ||
192 | <div class='clearall'></div> | ||
193 | <div class='section' id='section-8'> | ||
194 | <div class='docs'> | ||
195 | <div class='octowrap'> | ||
196 | <a class='octothorpe' href='#section-8'>#</a> | ||
197 | </div> | ||
198 | <p>QUESTION: When do we encounter an OSError?</p> | ||
199 | </div> | ||
200 | <div class='code'> | ||
201 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'OS Error writing record for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
202 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'record: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">record</span><span class="p">))</span> | ||
203 | <span class="k">raise</span> <span class="n">err</span></pre></div> | ||
204 | </div> | ||
205 | </div> | ||
206 | <div class='clearall'></div> | ||
207 | <div class='section' id='section-9'> | ||
208 | <div class='docs'> | ||
209 | <div class='octowrap'> | ||
210 | <a class='octothorpe' href='#section-9'>#</a> | ||
211 | </div> | ||
212 | <p>Safe get a bookmark from <code>state</code>.</p> | ||
213 | </div> | ||
214 | <div class='code'> | ||
215 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="n">default</span><span class="p">):</span></pre></div> | ||
216 | </div> | ||
217 | </div> | ||
218 | <div class='clearall'></div> | ||
219 | <div class='section' id='section-10'> | ||
220 | <div class='docs'> | ||
221 | <div class='octowrap'> | ||
222 | <a class='octothorpe' href='#section-10'>#</a> | ||
223 | </div> | ||
224 | <p>Hides an error though if <code>state</code> turns out to be <code>None</code></p> | ||
225 | </div> | ||
226 | <div class='code'> | ||
227 | <div class="highlight"><pre> <span class="k">if</span> <span class="p">(</span><span class="n">state</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="s1">'bookmarks'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">):</span> | ||
228 | <span class="k">return</span> <span class="n">default</span></pre></div> | ||
229 | </div> | ||
230 | </div> | ||
231 | <div class='clearall'></div> | ||
232 | <div class='section' id='section-11'> | ||
233 | <div class='docs'> | ||
234 | <div class='octowrap'> | ||
235 | <a class='octothorpe' href='#section-11'>#</a> | ||
236 | </div> | ||
237 | <p>This is also short enough for one line, is this supposed to be more readable?</p> | ||
238 | </div> | ||
239 | <div class='code'> | ||
240 | <div class="highlight"><pre> <span class="k">return</span> <span class="p">(</span> | ||
241 | <span class="n">state</span> | ||
242 | <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bookmarks'</span><span class="p">,</span> <span class="p">{})</span> | ||
243 | <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> | ||
244 | <span class="p">)</span></pre></div> | ||
245 | </div> | ||
246 | </div> | ||
247 | <div class='clearall'></div> | ||
248 | <div class='section' id='section-12'> | ||
249 | <div class='docs'> | ||
250 | <div class='octowrap'> | ||
251 | <a class='octothorpe' href='#section-12'>#</a> | ||
252 | </div> | ||
253 | <p>Updates and write state</p> | ||
254 | </div> | ||
255 | <div class='code'> | ||
256 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span></pre></div> | ||
257 | </div> | ||
258 | </div> | ||
259 | <div class='clearall'></div> | ||
260 | <div class='section' id='section-13'> | ||
261 | <div class='docs'> | ||
262 | <div class='octowrap'> | ||
263 | <a class='octothorpe' href='#section-13'>#</a> | ||
264 | </div> | ||
265 | |||
266 | </div> | ||
267 | <div class='code'> | ||
268 | <div class="highlight"><pre> <span class="k">if</span> <span class="s1">'bookmarks'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">:</span> | ||
269 | <span class="n">state</span><span class="p">[</span><span class="s1">'bookmarks'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> | ||
270 | <span class="n">state</span><span class="p">[</span><span class="s1">'bookmarks'</span><span class="p">][</span><span class="n">stream</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span> | ||
271 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Write state for stream: </span><span class="si">{}</span><span class="s1">, value: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
272 | <span class="n">singer</span><span class="o">.</span><span class="n">write_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></pre></div> | ||
273 | </div> | ||
274 | </div> | ||
275 | <div class='clearall'></div> | ||
276 | <div class='section' id='section-14'> | ||
277 | <div class='docs'> | ||
278 | <div class='octowrap'> | ||
279 | <a class='octothorpe' href='#section-14'>#</a> | ||
280 | </div> | ||
281 | <p>Upserts or deletes the ‘currently_syncing’ stream</p> | ||
282 | </div> | ||
283 | <div class='code'> | ||
284 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
285 | </div> | ||
286 | </div> | ||
287 | <div class='clearall'></div> | ||
288 | <div class='section' id='section-15'> | ||
289 | <div class='docs'> | ||
290 | <div class='octowrap'> | ||
291 | <a class='octothorpe' href='#section-15'>#</a> | ||
292 | </div> | ||
293 | <p>Why do we care if <code>stream_name</code> is passed in to delete <code>currently_syncing</code>?</p> | ||
294 | </div> | ||
295 | <div class='code'> | ||
296 | <div class="highlight"><pre> <span class="k">if</span> <span class="p">(</span><span class="n">stream_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="s1">'currently_syncing'</span> <span class="ow">in</span> <span class="n">state</span><span class="p">):</span> | ||
297 | <span class="k">del</span> <span class="n">state</span><span class="p">[</span><span class="s1">'currently_syncing'</span><span class="p">]</span> | ||
298 | <span class="k">else</span><span class="p">:</span> | ||
299 | <span class="n">singer</span><span class="o">.</span><span class="n">set_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
300 | <span class="n">singer</span><span class="o">.</span><span class="n">write_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></pre></div> | ||
301 | </div> | ||
302 | </div> | ||
303 | <div class='clearall'></div> | ||
304 | <div class='section' id='section-16'> | ||
305 | <div class='docs'> | ||
306 | <div class='octowrap'> | ||
307 | <a class='octothorpe' href='#section-16'>#</a> | ||
308 | </div> | ||
309 | <p>Get a list of selected, top-level fields for <code>stream_name</code></p> | ||
310 | </div> | ||
311 | <div class='code'> | ||
312 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
313 | </div> | ||
314 | </div> | ||
315 | <div class='clearall'></div> | ||
316 | <div class='section' id='section-17'> | ||
317 | <div class='docs'> | ||
318 | <div class='octowrap'> | ||
319 | <a class='octothorpe' href='#section-17'>#</a> | ||
320 | </div> | ||
321 | |||
322 | </div> | ||
323 | <div class='code'> | ||
324 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
325 | <span class="n">mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">to_map</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">metadata</span><span class="p">)</span> | ||
326 | <span class="n">mdata_list</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">to_list</span><span class="p">(</span><span class="n">mdata</span><span class="p">)</span> | ||
327 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="p">[]</span> | ||
328 | <span class="k">for</span> <span class="n">entry</span> <span class="ow">in</span> <span class="n">mdata_list</span><span class="p">:</span> | ||
329 | <span class="n">field</span> <span class="o">=</span> <span class="kc">None</span> | ||
330 | <span class="k">try</span><span class="p">:</span> | ||
331 | <span class="n">field</span> <span class="o">=</span> <span class="n">entry</span><span class="p">[</span><span class="s1">'breadcrumb'</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span> | ||
332 | <span class="k">if</span> <span class="n">entry</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'metadata'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'selected'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> | ||
333 | <span class="n">selected_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">field</span><span class="p">)</span> | ||
334 | <span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span></pre></div> | ||
335 | </div> | ||
336 | </div> | ||
337 | <div class='clearall'></div> | ||
338 | <div class='section' id='section-18'> | ||
339 | <div class='docs'> | ||
340 | <div class='octowrap'> | ||
341 | <a class='octothorpe' href='#section-18'>#</a> | ||
342 | </div> | ||
343 | <p>Swallow the error for the Stream level metadata</p> | ||
344 | </div> | ||
345 | <div class='code'> | ||
346 | <div class="highlight"><pre> <span class="k">pass</span> | ||
347 | <span class="k">return</span> <span class="n">selected_fields</span></pre></div> | ||
348 | </div> | ||
349 | </div> | ||
350 | <div class='clearall'></div> | ||
351 | <div class='section' id='section-19'> | ||
352 | <div class='docs'> | ||
353 | <div class='octowrap'> | ||
354 | <a class='octothorpe' href='#section-19'>#</a> | ||
355 | </div> | ||
356 | <p>Construct the request we want to make, make the request, and return the Response</p> | ||
357 | </div> | ||
358 | <div class='code'> | ||
359 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_data</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">endpoint_config</span><span class="p">,</span> <span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">range_rows</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
360 | </div> | ||
361 | </div> | ||
362 | <div class='clearall'></div> | ||
363 | <div class='section' id='section-20'> | ||
364 | <div class='docs'> | ||
365 | <div class='octowrap'> | ||
366 | <a class='octothorpe' href='#section-20'>#</a> | ||
367 | </div> | ||
368 | |||
369 | </div> | ||
370 | <div class='code'> | ||
371 | <div class="highlight"><pre></pre></div> | ||
372 | </div> | ||
373 | </div> | ||
374 | <div class='clearall'></div> | ||
375 | <div class='section' id='section-21'> | ||
376 | <div class='docs'> | ||
377 | <div class='octowrap'> | ||
378 | <a class='octothorpe' href='#section-21'>#</a> | ||
379 | </div> | ||
380 | <h3>Build the query</h3> | ||
381 | </div> | ||
382 | <div class='code'> | ||
383 | <div class="highlight"><pre> <span class="n">stream_name_escaped</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
384 | </div> | ||
385 | </div> | ||
386 | <div class='clearall'></div> | ||
387 | <div class='section' id='section-22'> | ||
388 | <div class='docs'> | ||
389 | <div class='octowrap'> | ||
390 | <a class='octothorpe' href='#section-22'>#</a> | ||
391 | </div> | ||
392 | <p>Encode stream_name to fix issues with special characters in <code>stream_name</code> | ||
393 | QUESTION: If there’s special characters here how do databases handle it?</p> | ||
394 | </div> | ||
395 | <div class='code'> | ||
396 | <div class="highlight"><pre> <span class="n">stream_name_encoded</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
397 | |||
398 | <span class="k">if</span> <span class="ow">not</span> <span class="n">range_rows</span><span class="p">:</span> | ||
399 | <span class="n">range_rows</span> <span class="o">=</span> <span class="s1">''</span></pre></div> | ||
400 | </div> | ||
401 | </div> | ||
402 | <div class='clearall'></div> | ||
403 | <div class='section' id='section-23'> | ||
404 | <div class='docs'> | ||
405 | <div class='octowrap'> | ||
406 | <a class='octothorpe' href='#section-23'>#</a> | ||
407 | </div> | ||
408 | <p>QUESTION: Why is this not a <code>string.format()</code> with keywords?</p> | ||
409 | </div> | ||
410 | <div class='code'> | ||
411 | <div class="highlight"><pre> <span class="n">path</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'path'</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
412 | <span class="s1">'</span><span class="si">{spreadsheet_id}</span><span class="s1">'</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{sheet_title}</span><span class="s1">'</span><span class="p">,</span> <span class="n">stream_name_encoded</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
413 | <span class="s1">'</span><span class="si">{range_rows}</span><span class="s1">'</span><span class="p">,</span> <span class="n">range_rows</span><span class="p">)</span> | ||
414 | <span class="n">params</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'params'</span><span class="p">,</span> <span class="p">{})</span> | ||
415 | <span class="n">api</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'sheets'</span><span class="p">)</span> | ||
416 | <span class="n">querystring</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">'</span><span class="si">%s</span><span class="s1">=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
417 | <span class="s1">'</span><span class="si">{sheet_title}</span><span class="s1">'</span><span class="p">,</span> <span class="n">stream_name_encoded</span><span class="p">)</span> | ||
418 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'URL: </span><span class="si">{}</span><span class="s1">/</span><span class="si">{}</span><span class="s1">?</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">client</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">querystring</span><span class="p">))</span> | ||
419 | <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> | ||
420 | <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">()</span></pre></div> | ||
421 | </div> | ||
422 | </div> | ||
423 | <div class='clearall'></div> | ||
424 | <div class='section' id='section-24'> | ||
425 | <div class='docs'> | ||
426 | <div class='octowrap'> | ||
427 | <a class='octothorpe' href='#section-24'>#</a> | ||
428 | </div> | ||
429 | <h3>Make the query</h3> | ||
430 | </div> | ||
431 | <div class='code'> | ||
432 | <div class="highlight"><pre> <span class="n">data</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> | ||
433 | <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> | ||
434 | <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> | ||
435 | <span class="n">params</span><span class="o">=</span><span class="n">querystring</span><span class="p">,</span> | ||
436 | <span class="n">endpoint</span><span class="o">=</span><span class="n">stream_name_escaped</span><span class="p">)</span></pre></div> | ||
437 | </div> | ||
438 | </div> | ||
439 | <div class='clearall'></div> | ||
440 | <div class='section' id='section-25'> | ||
441 | <div class='docs'> | ||
442 | <div class='octowrap'> | ||
443 | <a class='octothorpe' href='#section-25'>#</a> | ||
444 | </div> | ||
445 | <h3>Return the Response.json()</h3> | ||
446 | </div> | ||
447 | <div class='code'> | ||
448 | <div class="highlight"><pre> <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">time_extracted</span></pre></div> | ||
449 | </div> | ||
450 | </div> | ||
451 | <div class='clearall'></div> | ||
452 | <div class='section' id='section-26'> | ||
453 | <div class='docs'> | ||
454 | <div class='octowrap'> | ||
455 | <a class='octothorpe' href='#section-26'>#</a> | ||
456 | </div> | ||
457 | <hr /> | ||
458 | <h1>Transform Functions</h1> | ||
459 | <p>There’s this line of code that happens in these that is a bit confusing:</p> | ||
460 | </div> | ||
461 | <div class='code'> | ||
462 | <div class="highlight"><pre></pre></div> | ||
463 | </div> | ||
464 | </div> | ||
465 | <div class='clearall'></div> | ||
466 | <div class='section' id='section-27'> | ||
467 | <div class='docs'> | ||
468 | <div class='octowrap'> | ||
469 | <a class='octothorpe' href='#section-27'>#</a> | ||
470 | </div> | ||
471 | <pre><code class="language-python">json.loads(json.dumps(some_object)) | ||
472 | </code></pre> | ||
473 | </div> | ||
474 | <div class='code'> | ||
475 | <div class="highlight"><pre></pre></div> | ||
476 | </div> | ||
477 | </div> | ||
478 | <div class='clearall'></div> | ||
479 | <div class='section' id='section-28'> | ||
480 | <div class='docs'> | ||
481 | <div class='octowrap'> | ||
482 | <a class='octothorpe' href='#section-28'>#</a> | ||
483 | </div> | ||
484 | <p>I don’t see the use here. We turn Python into a JSON string and back again. | ||
485 | The only thing I could see in the repl is that integer keys get stringified.</p> | ||
486 | </div> | ||
487 | <div class='code'> | ||
488 | <div class="highlight"><pre></pre></div> | ||
489 | </div> | ||
490 | </div> | ||
491 | <div class='clearall'></div> | ||
492 | <div class='section' id='section-29'> | ||
493 | <div class='docs'> | ||
494 | <div class='octowrap'> | ||
495 | <a class='octothorpe' href='#section-29'>#</a> | ||
496 | </div> | ||
497 | <p>In general, the transform functions just look like “maybe pop some | ||
498 | stuff”, “maybe add some stuff”, and return the input in a list</p> | ||
499 | </div> | ||
500 | <div class='code'> | ||
501 | <div class="highlight"><pre></pre></div> | ||
502 | </div> | ||
503 | </div> | ||
504 | <div class='clearall'></div> | ||
505 | <div class='section' id='section-30'> | ||
506 | <div class='docs'> | ||
507 | <div class='octowrap'> | ||
508 | <a class='octothorpe' href='#section-30'>#</a> | ||
509 | </div> | ||
510 | <hr /> | ||
511 | </div> | ||
512 | <div class='code'> | ||
513 | <div class="highlight"><pre></pre></div> | ||
514 | </div> | ||
515 | </div> | ||
516 | <div class='clearall'></div> | ||
517 | <div class='section' id='section-31'> | ||
518 | <div class='docs'> | ||
519 | <div class='octowrap'> | ||
520 | <a class='octothorpe' href='#section-31'>#</a> | ||
521 | </div> | ||
522 | <p>remove nodes from lastModifyingUser, format as array</p> | ||
523 | </div> | ||
524 | <div class='code'> | ||
525 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_file_metadata</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">):</span></pre></div> | ||
526 | </div> | ||
527 | </div> | ||
528 | <div class='clearall'></div> | ||
529 | <div class='section' id='section-32'> | ||
530 | <div class='docs'> | ||
531 | <div class='octowrap'> | ||
532 | <a class='octothorpe' href='#section-32'>#</a> | ||
533 | </div> | ||
534 | |||
535 | </div> | ||
536 | <div class='code'> | ||
537 | <div class="highlight"><pre> <span class="n">file_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">))</span> | ||
538 | |||
539 | <span class="k">if</span> <span class="n">file_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'lastModifyingUser'</span><span class="p">):</span> | ||
540 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'photoLink'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
541 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'me'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
542 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'permissionId'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
543 | |||
544 | <span class="n">file_metadata_arr</span> <span class="o">=</span> <span class="p">[]</span> | ||
545 | <span class="n">file_metadata_arr</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">file_metadata_tf</span><span class="p">)</span> | ||
546 | <span class="k">return</span> <span class="n">file_metadata_arr</span></pre></div> | ||
547 | </div> | ||
548 | </div> | ||
549 | <div class='clearall'></div> | ||
550 | <div class='section' id='section-33'> | ||
551 | <div class='docs'> | ||
552 | <div class='octowrap'> | ||
553 | <a class='octothorpe' href='#section-33'>#</a> | ||
554 | </div> | ||
555 | <p>remove defaultFormat and sheets nodes, format as array</p> | ||
556 | </div> | ||
557 | <div class='code'> | ||
558 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_spreadsheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">):</span></pre></div> | ||
559 | </div> | ||
560 | </div> | ||
561 | <div class='clearall'></div> | ||
562 | <div class='section' id='section-34'> | ||
563 | <div class='docs'> | ||
564 | <div class='octowrap'> | ||
565 | <a class='octothorpe' href='#section-34'>#</a> | ||
566 | </div> | ||
567 | |||
568 | </div> | ||
569 | <div class='code'> | ||
570 | <div class="highlight"><pre> <span class="n">spreadsheet_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">))</span> | ||
571 | |||
572 | <span class="k">if</span> <span class="n">spreadsheet_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">):</span> | ||
573 | <span class="n">spreadsheet_metadata_tf</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'defaultFormat'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
574 | <span class="n">spreadsheet_metadata_tf</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
575 | |||
576 | <span class="n">spreadsheet_metadata_arr</span> <span class="o">=</span> <span class="p">[]</span> | ||
577 | <span class="n">spreadsheet_metadata_arr</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">spreadsheet_metadata_tf</span><span class="p">)</span> | ||
578 | <span class="k">return</span> <span class="n">spreadsheet_metadata_arr</span></pre></div> | ||
579 | </div> | ||
580 | </div> | ||
581 | <div class='clearall'></div> | ||
582 | <div class='section' id='section-35'> | ||
583 | <div class='docs'> | ||
584 | <div class='octowrap'> | ||
585 | <a class='octothorpe' href='#section-35'>#</a> | ||
586 | </div> | ||
587 | <p>add spreadsheetId, sheetUrl, and columns metadata</p> | ||
588 | </div> | ||
589 | <div class='code'> | ||
590 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_sheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet</span><span class="p">,</span> <span class="n">columns</span><span class="p">):</span></pre></div> | ||
591 | </div> | ||
592 | </div> | ||
593 | <div class='clearall'></div> | ||
594 | <div class='section' id='section-36'> | ||
595 | <div class='docs'> | ||
596 | <div class='octowrap'> | ||
597 | <a class='octothorpe' href='#section-36'>#</a> | ||
598 | </div> | ||
599 | |||
600 | </div> | ||
601 | <div class='code'> | ||
602 | <div class="highlight"><pre> <span class="n">sheet_metadata</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">)</span> | ||
603 | <span class="n">sheet_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">sheet_metadata</span><span class="p">))</span> | ||
604 | <span class="n">sheet_id</span> <span class="o">=</span> <span class="n">sheet_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheetId'</span><span class="p">)</span> | ||
605 | <span class="n">sheet_url</span> <span class="o">=</span> <span class="s1">'https://docs.google.com/spreadsheets/d/</span><span class="si">{}</span><span class="s1">/edit#gid=</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet_id</span><span class="p">)</span> | ||
606 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'spreadsheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
607 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'sheetUrl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_url</span> | ||
608 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'columns'</span><span class="p">]</span> <span class="o">=</span> <span class="n">columns</span> | ||
609 | <span class="k">return</span> <span class="n">sheet_metadata_tf</span></pre></div> | ||
610 | </div> | ||
611 | </div> | ||
612 | <div class='clearall'></div> | ||
613 | <div class='section' id='section-37'> | ||
614 | <div class='docs'> | ||
615 | <div class='octowrap'> | ||
616 | <a class='octothorpe' href='#section-37'>#</a> | ||
617 | </div> | ||
618 | <p>Convert Excel Date Serial Number (excel_date_sn) to datetime string timezone_str: defaults to</p> | ||
619 | </div> | ||
620 | <div class='code'> | ||
621 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">excel_to_dttm_str</span><span class="p">(</span><span class="n">excel_date_sn</span><span class="p">,</span> <span class="n">timezone_str</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
622 | </div> | ||
623 | </div> | ||
624 | <div class='clearall'></div> | ||
625 | <div class='section' id='section-38'> | ||
626 | <div class='docs'> | ||
627 | <div class='octowrap'> | ||
628 | <a class='octothorpe' href='#section-38'>#</a> | ||
629 | </div> | ||
630 | <p>UTC (which we assume is the timezone for ALL datetimes)</p> | ||
631 | </div> | ||
632 | <div class='code'> | ||
633 | <div class="highlight"><pre> <span class="k">if</span> <span class="ow">not</span> <span class="n">timezone_str</span><span class="p">:</span> | ||
634 | <span class="n">timezone_str</span> <span class="o">=</span> <span class="s1">'UTC'</span> | ||
635 | <span class="n">tzn</span> <span class="o">=</span> <span class="n">pytz</span><span class="o">.</span><span class="n">timezone</span><span class="p">(</span><span class="n">timezone_str</span><span class="p">)</span> | ||
636 | <span class="n">epoch_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="mi">1970</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> | ||
637 | |||
638 | <span class="n">sec_per_day</span> <span class="o">=</span> <span class="mi">86400</span></pre></div> | ||
639 | </div> | ||
640 | </div> | ||
641 | <div class='clearall'></div> | ||
642 | <div class='section' id='section-39'> | ||
643 | <div class='docs'> | ||
644 | <div class='octowrap'> | ||
645 | <a class='octothorpe' href='#section-39'>#</a> | ||
646 | </div> | ||
647 | <p>1970-01-01T00:00:00Z, Lotus Notes Serial Number for Epoch Start Date</p> | ||
648 | </div> | ||
649 | <div class='code'> | ||
650 | <div class="highlight"><pre> <span class="n">excel_epoch</span> <span class="o">=</span> <span class="mi">25569</span></pre></div> | ||
651 | </div> | ||
652 | </div> | ||
653 | <div class='clearall'></div> | ||
654 | <div class='section' id='section-40'> | ||
655 | <div class='docs'> | ||
656 | <div class='octowrap'> | ||
657 | <a class='octothorpe' href='#section-40'>#</a> | ||
658 | </div> | ||
659 | <p>Seconds since Epoch, times the seconds per day => days since Epoch?</p> | ||
660 | </div> | ||
661 | <div class='code'> | ||
662 | <div class="highlight"><pre> <span class="n">epoch_sec</span> <span class="o">=</span> <span class="n">math</span><span class="o">.</span><span class="n">floor</span><span class="p">((</span><span class="n">excel_date_sn</span> <span class="o">-</span> <span class="n">excel_epoch</span><span class="p">)</span> <span class="o">*</span> <span class="n">sec_per_day</span><span class="p">)</span> | ||
663 | |||
664 | <span class="n">excel_dttm</span> <span class="o">=</span> <span class="n">epoch_dttm</span> <span class="o">+</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">epoch_sec</span><span class="p">)</span> | ||
665 | <span class="n">utc_dttm</span> <span class="o">=</span> <span class="n">tzn</span><span class="o">.</span><span class="n">localize</span><span class="p">(</span><span class="n">excel_dttm</span><span class="p">)</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">pytz</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> | ||
666 | <span class="n">utc_dttm_str</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">utc_dttm</span><span class="p">)</span> | ||
667 | <span class="k">return</span> <span class="n">utc_dttm_str</span></pre></div> | ||
668 | </div> | ||
669 | </div> | ||
670 | <div class='clearall'></div> | ||
671 | <div class='section' id='section-41'> | ||
672 | <div class='docs'> | ||
673 | <div class='octowrap'> | ||
674 | <a class='octothorpe' href='#section-41'>#</a> | ||
675 | </div> | ||
676 | <hr /> | ||
677 | <h3>WARNING This next function is confusing</h3> | ||
678 | </div> | ||
679 | <div class='code'> | ||
680 | <div class="highlight"><pre></pre></div> | ||
681 | </div> | ||
682 | </div> | ||
683 | <div class='clearall'></div> | ||
684 | <div class='section' id='section-42'> | ||
685 | <div class='docs'> | ||
686 | <div class='octowrap'> | ||
687 | <a class='octothorpe' href='#section-42'>#</a> | ||
688 | </div> | ||
689 | <p>In general, the point of the function is to transform the field based on the data type that the | ||
690 | API tells us. It loops over every row and then every column in the row.</p> | ||
691 | </div> | ||
692 | <div class='code'> | ||
693 | <div class="highlight"><pre></pre></div> | ||
694 | </div> | ||
695 | </div> | ||
696 | <div class='clearall'></div> | ||
697 | <div class='section' id='section-43'> | ||
698 | <div class='docs'> | ||
699 | <div class='octowrap'> | ||
700 | <a class='octothorpe' href='#section-43'>#</a> | ||
701 | </div> | ||
702 | <p>For the <code>TIME</code> fields, there’s no reason it should work. And for some cases, the value returned is | ||
703 | just wrong.</p> | ||
704 | </div> | ||
705 | <div class='code'> | ||
706 | <div class="highlight"><pre></pre></div> | ||
707 | </div> | ||
708 | </div> | ||
709 | <div class='clearall'></div> | ||
710 | <div class='section' id='section-44'> | ||
711 | <div class='docs'> | ||
712 | <div class='octowrap'> | ||
713 | <a class='octothorpe' href='#section-44'>#</a> | ||
714 | </div> | ||
715 | <p>You can look at the code for <code>timedelta</code> and you would see that this constructor wants to | ||
716 | normalize the input of 6 units into 3 (you can create the object with <code>years</code>, <code>months</code>, <code>days</code>, | ||
717 | <code>hours</code>, <code>minutes</code>, and <code>seconds</code>. But it will convert values into just <code>days</code>, <code>hours</code>, and | ||
718 | <code>seconds</code>).</p> | ||
719 | </div> | ||
720 | <div class='code'> | ||
721 | <div class="highlight"><pre></pre></div> | ||
722 | </div> | ||
723 | </div> | ||
724 | <div class='clearall'></div> | ||
725 | <div class='section' id='section-45'> | ||
726 | <div class='docs'> | ||
727 | <div class='octowrap'> | ||
728 | <a class='octothorpe' href='#section-45'>#</a> | ||
729 | </div> | ||
730 | <p><em>Disclaimer I don’t have the exact units, but the spirit of | ||
731 | the idea is here.</em></p> | ||
732 | </div> | ||
733 | <div class='code'> | ||
734 | <div class="highlight"><pre></pre></div> | ||
735 | </div> | ||
736 | </div> | ||
737 | <div class='clearall'></div> | ||
738 | <div class='section' id='section-46'> | ||
739 | <div class='docs'> | ||
740 | <div class='octowrap'> | ||
741 | <a class='octothorpe' href='#section-46'>#</a> | ||
742 | </div> | ||
743 | <p>When we pass in <code>seconds</code> here as the value we get from the API times the number of seconds in a | ||
744 | day, how <code>timedelta</code> does its normalization gives us an incorrect value. It takes the input to | ||
745 | <code>seconds</code> and passes that to <code>divmod()</code> which returns a 2-ple as the result. The first element is | ||
746 | our input integer divided by the number of seconds in a day. The second element is our input mod | ||
747 | the number of seconds in a day. Then these results are added to the rest of the normalization and | ||
748 | we get the correct time value back out. It’s easy to imagine that since we don’t pass in a <code>days</code> | ||
749 | argument, our <code>divmod</code>‘s days output is just added to zero. The <code>__str__()</code> for <code>timedelta</code> must | ||
750 | be something like <code>"{my_days} days, {time_since_midnight(my_seconds)}"</code>, which is essentially what | ||
751 | we get after this transform function.</p> | ||
752 | </div> | ||
753 | <div class='code'> | ||
754 | <div class="highlight"><pre></pre></div> | ||
755 | </div> | ||
756 | </div> | ||
757 | <div class='clearall'></div> | ||
758 | <div class='section' id='section-47'> | ||
759 | <div class='docs'> | ||
760 | <div class='octowrap'> | ||
761 | <a class='octothorpe' href='#section-47'>#</a> | ||
762 | </div> | ||
763 | <hr /> | ||
764 | </div> | ||
765 | <div class='code'> | ||
766 | <div class="highlight"><pre></pre></div> | ||
767 | </div> | ||
768 | </div> | ||
769 | <div class='clearall'></div> | ||
770 | <div class='section' id='section-48'> | ||
771 | <div class='docs'> | ||
772 | <div class='octowrap'> | ||
773 | <a class='octothorpe' href='#section-48'>#</a> | ||
774 | </div> | ||
775 | <p>add spreadsheet_id, sheet_id, and row, convert dates/times Convert from array of values to</p> | ||
776 | </div> | ||
777 | <div class='code'> | ||
778 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_sheet_data</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet_id</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="n">from_row</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="n">sheet_data_rows</span><span class="p">):</span></pre></div> | ||
779 | </div> | ||
780 | </div> | ||
781 | <div class='clearall'></div> | ||
782 | <div class='section' id='section-49'> | ||
783 | <div class='docs'> | ||
784 | <div class='octowrap'> | ||
785 | <a class='octothorpe' href='#section-49'>#</a> | ||
786 | </div> | ||
787 | <p>JSON with column names as keys</p> | ||
788 | </div> | ||
789 | <div class='code'> | ||
790 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span> <span class="o">=</span> <span class="p">[]</span> | ||
791 | <span class="n">row_num</span> <span class="o">=</span> <span class="n">from_row</span></pre></div> | ||
792 | </div> | ||
793 | </div> | ||
794 | <div class='clearall'></div> | ||
795 | <div class='section' id='section-50'> | ||
796 | <div class='docs'> | ||
797 | <div class='octowrap'> | ||
798 | <a class='octothorpe' href='#section-50'>#</a> | ||
799 | </div> | ||
800 | <p>Create sorted list of columns based on columnIndex</p> | ||
801 | </div> | ||
802 | <div class='code'> | ||
803 | <div class="highlight"><pre> <span class="n">cols</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">i</span><span class="p">[</span><span class="s1">'columnIndex'</span><span class="p">])</span> | ||
804 | |||
805 | <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">sheet_data_rows</span><span class="p">:</span></pre></div> | ||
806 | </div> | ||
807 | </div> | ||
808 | <div class='clearall'></div> | ||
809 | <div class='section' id='section-51'> | ||
810 | <div class='docs'> | ||
811 | <div class='octowrap'> | ||
812 | <a class='octothorpe' href='#section-51'>#</a> | ||
813 | </div> | ||
814 | <p>If empty row, SKIP</p> | ||
815 | </div> | ||
816 | <div class='code'> | ||
817 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">row</span> <span class="o">==</span> <span class="p">[]:</span> | ||
818 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'EMPTY ROW: </span><span class="si">{}</span><span class="s1">, SKIPPING'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">row_num</span><span class="p">))</span> | ||
819 | <span class="k">else</span><span class="p">:</span> | ||
820 | <span class="n">sheet_data_row_tf</span> <span class="o">=</span> <span class="p">{}</span></pre></div> | ||
821 | </div> | ||
822 | </div> | ||
823 | <div class='clearall'></div> | ||
824 | <div class='section' id='section-52'> | ||
825 | <div class='docs'> | ||
826 | <div class='octowrap'> | ||
827 | <a class='octothorpe' href='#section-52'>#</a> | ||
828 | </div> | ||
829 | <p>Add spreadsheet_id, sheet_id, and row</p> | ||
830 | </div> | ||
831 | <div class='code'> | ||
832 | <div class="highlight"><pre> <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_spreadsheet_id'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
833 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_sheet_id'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_id</span> | ||
834 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_row'</span><span class="p">]</span> <span class="o">=</span> <span class="n">row_num</span> | ||
835 | <span class="n">col_num</span> <span class="o">=</span> <span class="mi">1</span> | ||
836 | <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">row</span><span class="p">:</span></pre></div> | ||
837 | </div> | ||
838 | </div> | ||
839 | <div class='clearall'></div> | ||
840 | <div class='section' id='section-53'> | ||
841 | <div class='docs'> | ||
842 | <div class='octowrap'> | ||
843 | <a class='octothorpe' href='#section-53'>#</a> | ||
844 | </div> | ||
845 | <p>Select column metadata based on column index</p> | ||
846 | </div> | ||
847 | <div class='code'> | ||
848 | <div class="highlight"><pre> <span class="n">col</span> <span class="o">=</span> <span class="n">cols</span><span class="p">[</span><span class="n">col_num</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span> | ||
849 | <span class="n">col_skipped</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnSkipped'</span><span class="p">)</span> | ||
850 | <span class="k">if</span> <span class="ow">not</span> <span class="n">col_skipped</span><span class="p">:</span></pre></div> | ||
851 | </div> | ||
852 | </div> | ||
853 | <div class='clearall'></div> | ||
854 | <div class='section' id='section-54'> | ||
855 | <div class='docs'> | ||
856 | <div class='octowrap'> | ||
857 | <a class='octothorpe' href='#section-54'>#</a> | ||
858 | </div> | ||
859 | <p>Get column metadata</p> | ||
860 | </div> | ||
861 | <div class='code'> | ||
862 | <div class="highlight"><pre> <span class="n">col_name</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnName'</span><span class="p">)</span> | ||
863 | <span class="n">col_type</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnType'</span><span class="p">)</span> | ||
864 | <span class="n">col_letter</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnLetter'</span><span class="p">)</span></pre></div> | ||
865 | </div> | ||
866 | </div> | ||
867 | <div class='clearall'></div> | ||
868 | <div class='section' id='section-55'> | ||
869 | <div class='docs'> | ||
870 | <div class='octowrap'> | ||
871 | <a class='octothorpe' href='#section-55'>#</a> | ||
872 | </div> | ||
873 | <p>NULL values</p> | ||
874 | </div> | ||
875 | <div class='code'> | ||
876 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">value</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span> | ||
877 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">None</span></pre></div> | ||
878 | </div> | ||
879 | </div> | ||
880 | <div class='clearall'></div> | ||
881 | <div class='section' id='section-56'> | ||
882 | <div class='docs'> | ||
883 | <div class='octowrap'> | ||
884 | <a class='octothorpe' href='#section-56'>#</a> | ||
885 | </div> | ||
886 | <p>Convert dates/times from Lotus Notes Serial Numbers | ||
887 | DATE-TIME</p> | ||
888 | </div> | ||
889 | <div class='code'> | ||
890 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.DATE_TIME'</span><span class="p">:</span> | ||
891 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
892 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">excel_to_dttm_str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
893 | <span class="k">else</span><span class="p">:</span> | ||
894 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
895 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
896 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
897 | </div> | ||
898 | </div> | ||
899 | <div class='clearall'></div> | ||
900 | <div class='section' id='section-57'> | ||
901 | <div class='docs'> | ||
902 | <div class='octowrap'> | ||
903 | <a class='octothorpe' href='#section-57'>#</a> | ||
904 | </div> | ||
905 | <p>DATE</p> | ||
906 | </div> | ||
907 | <div class='code'> | ||
908 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.DATE'</span><span class="p">:</span> | ||
909 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
910 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">excel_to_dttm_str</span><span class="p">(</span><span class="n">value</span><span class="p">)[:</span><span class="mi">10</span><span class="p">]</span> | ||
911 | <span class="k">else</span><span class="p">:</span> | ||
912 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
913 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
914 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
915 | </div> | ||
916 | </div> | ||
917 | <div class='clearall'></div> | ||
918 | <div class='section' id='section-58'> | ||
919 | <div class='docs'> | ||
920 | <div class='octowrap'> | ||
921 | <a class='octothorpe' href='#section-58'>#</a> | ||
922 | </div> | ||
923 | <p>TIME ONLY (NO DATE)</p> | ||
924 | </div> | ||
925 | <div class='code'> | ||
926 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.TIME'</span><span class="p">:</span> | ||
927 | |||
928 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
929 | <span class="k">try</span><span class="p">:</span> | ||
930 | <span class="n">total_secs</span> <span class="o">=</span> <span class="n">value</span> <span class="o">*</span> <span class="mi">86400</span></pre></div> | ||
931 | </div> | ||
932 | </div> | ||
933 | <div class='clearall'></div> | ||
934 | <div class='section' id='section-59'> | ||
935 | <div class='docs'> | ||
936 | <div class='octowrap'> | ||
937 | <a class='octothorpe' href='#section-59'>#</a> | ||
938 | </div> | ||
939 | <p>Create string formatted like HH:MM:SS</p> | ||
940 | </div> | ||
941 | <div class='code'> | ||
942 | <div class="highlight"><pre> <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">total_secs</span><span class="p">))</span> | ||
943 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
944 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
945 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
946 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
947 | <span class="k">else</span><span class="p">:</span> | ||
948 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></pre></div> | ||
949 | </div> | ||
950 | </div> | ||
951 | <div class='clearall'></div> | ||
952 | <div class='section' id='section-60'> | ||
953 | <div class='docs'> | ||
954 | <div class='octowrap'> | ||
955 | <a class='octothorpe' href='#section-60'>#</a> | ||
956 | </div> | ||
957 | <p>NUMBER (INTEGER AND FLOAT)</p> | ||
958 | </div> | ||
959 | <div class='code'> | ||
960 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType'</span><span class="p">:</span> | ||
961 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | ||
962 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
963 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span></pre></div> | ||
964 | </div> | ||
965 | </div> | ||
966 | <div class='clearall'></div> | ||
967 | <div class='section' id='section-61'> | ||
968 | <div class='docs'> | ||
969 | <div class='octowrap'> | ||
970 | <a class='octothorpe' href='#section-61'>#</a> | ||
971 | </div> | ||
972 | <p>Determine float decimal digits</p> | ||
973 | </div> | ||
974 | <div class='code'> | ||
975 | <div class="highlight"><pre> <span class="n">decimal_digits</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span> | ||
976 | <span class="k">if</span> <span class="n">decimal_digits</span> <span class="o">></span> <span class="mi">15</span><span class="p">:</span> | ||
977 | <span class="k">try</span><span class="p">:</span></pre></div> | ||
978 | </div> | ||
979 | </div> | ||
980 | <div class='clearall'></div> | ||
981 | <div class='section' id='section-62'> | ||
982 | <div class='docs'> | ||
983 | <div class='octowrap'> | ||
984 | <a class='octothorpe' href='#section-62'>#</a> | ||
985 | </div> | ||
986 | <p>ROUND to multipleOf: 1e-15</p> | ||
987 | </div> | ||
988 | <div class='code'> | ||
989 | <div class="highlight"><pre> <span class="n">col_val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="mi">15</span><span class="p">))</span> | ||
990 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
991 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
992 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
993 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
994 | <span class="k">else</span><span class="p">:</span> <span class="c1"># decimal_digits <= 15, no rounding</span> | ||
995 | <span class="k">try</span><span class="p">:</span> | ||
996 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
997 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
998 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
999 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1000 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1001 | <span class="k">else</span><span class="p">:</span> | ||
1002 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1003 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1004 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
1005 | </div> | ||
1006 | </div> | ||
1007 | <div class='clearall'></div> | ||
1008 | <div class='section' id='section-63'> | ||
1009 | <div class='docs'> | ||
1010 | <div class='octowrap'> | ||
1011 | <a class='octothorpe' href='#section-63'>#</a> | ||
1012 | </div> | ||
1013 | <p>STRING</p> | ||
1014 | </div> | ||
1015 | <div class='code'> | ||
1016 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'stringValue'</span><span class="p">:</span> | ||
1017 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></pre></div> | ||
1018 | </div> | ||
1019 | </div> | ||
1020 | <div class='clearall'></div> | ||
1021 | <div class='section' id='section-64'> | ||
1022 | <div class='docs'> | ||
1023 | <div class='octowrap'> | ||
1024 | <a class='octothorpe' href='#section-64'>#</a> | ||
1025 | </div> | ||
1026 | <p>BOOLEAN</p> | ||
1027 | </div> | ||
1028 | <div class='code'> | ||
1029 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'boolValue'</span><span class="p">:</span> | ||
1030 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">bool</span><span class="p">):</span> | ||
1031 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">value</span> | ||
1032 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> | ||
1033 | <span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'true'</span><span class="p">,</span> <span class="s1">'t'</span><span class="p">,</span> <span class="s1">'yes'</span><span class="p">,</span> <span class="s1">'y'</span><span class="p">):</span> | ||
1034 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">True</span> | ||
1035 | <span class="k">elif</span> <span class="n">value</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'false'</span><span class="p">,</span> <span class="s1">'f'</span><span class="p">,</span> <span class="s1">'no'</span><span class="p">,</span> <span class="s1">'n'</span><span class="p">):</span> | ||
1036 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">False</span> | ||
1037 | <span class="k">else</span><span class="p">:</span> | ||
1038 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1039 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1040 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1041 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | ||
1042 | <span class="k">if</span> <span class="n">value</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">):</span> | ||
1043 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">True</span> | ||
1044 | <span class="k">elif</span> <span class="n">value</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | ||
1045 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">False</span> | ||
1046 | <span class="k">else</span><span class="p">:</span> | ||
1047 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1048 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1049 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
1050 | </div> | ||
1051 | </div> | ||
1052 | <div class='clearall'></div> | ||
1053 | <div class='section' id='section-65'> | ||
1054 | <div class='docs'> | ||
1055 | <div class='octowrap'> | ||
1056 | <a class='octothorpe' href='#section-65'>#</a> | ||
1057 | </div> | ||
1058 | <p>OTHER: Convert everything else to a string</p> | ||
1059 | </div> | ||
1060 | <div class='code'> | ||
1061 | <div class="highlight"><pre> <span class="k">else</span><span class="p">:</span> | ||
1062 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1063 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1064 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1065 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="n">col_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">col_val</span> | ||
1066 | <span class="n">col_num</span> <span class="o">=</span> <span class="n">col_num</span> <span class="o">+</span> <span class="mi">1</span></pre></div> | ||
1067 | </div> | ||
1068 | </div> | ||
1069 | <div class='clearall'></div> | ||
1070 | <div class='section' id='section-66'> | ||
1071 | <div class='docs'> | ||
1072 | <div class='octowrap'> | ||
1073 | <a class='octothorpe' href='#section-66'>#</a> | ||
1074 | </div> | ||
1075 | <p>APPEND non-empty row</p> | ||
1076 | </div> | ||
1077 | <div class='code'> | ||
1078 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_data_row_tf</span><span class="p">)</span> | ||
1079 | <span class="n">row_num</span> <span class="o">=</span> <span class="n">row_num</span> <span class="o">+</span> <span class="mi">1</span> | ||
1080 | <span class="k">return</span> <span class="n">sheet_data_tf</span><span class="p">,</span> <span class="n">row_num</span></pre></div> | ||
1081 | </div> | ||
1082 | </div> | ||
1083 | <div class='clearall'></div> | ||
1084 | <div class='section' id='section-67'> | ||
1085 | <div class='docs'> | ||
1086 | <div class='octowrap'> | ||
1087 | <a class='octothorpe' href='#section-67'>#</a> | ||
1088 | </div> | ||
1089 | <hr /> | ||
1090 | </div> | ||
1091 | <div class='code'> | ||
1092 | <div class="highlight"><pre></pre></div> | ||
1093 | </div> | ||
1094 | </div> | ||
1095 | <div class='clearall'></div> | ||
1096 | <div class='section' id='section-68'> | ||
1097 | <div class='docs'> | ||
1098 | <div class='octowrap'> | ||
1099 | <a class='octothorpe' href='#section-68'>#</a> | ||
1100 | </div> | ||
1101 | <h1>Main Functions</h1> | ||
1102 | </div> | ||
1103 | <div class='code'> | ||
1104 | <div class="highlight"><pre></pre></div> | ||
1105 | </div> | ||
1106 | </div> | ||
1107 | <div class='clearall'></div> | ||
1108 | <div class='section' id='section-69'> | ||
1109 | <div class='docs'> | ||
1110 | <div class='octowrap'> | ||
1111 | <a class='octothorpe' href='#section-69'>#</a> | ||
1112 | </div> | ||
1113 | <hr /> | ||
1114 | </div> | ||
1115 | <div class='code'> | ||
1116 | <div class="highlight"><pre></pre></div> | ||
1117 | </div> | ||
1118 | </div> | ||
1119 | <div class='clearall'></div> | ||
1120 | <div class='section' id='section-70'> | ||
1121 | <div class='docs'> | ||
1122 | <div class='octowrap'> | ||
1123 | <a class='octothorpe' href='#section-70'>#</a> | ||
1124 | </div> | ||
1125 | <p>Transform/validate batch of records w/ schema and sent to target</p> | ||
1126 | </div> | ||
1127 | <div class='code'> | ||
1128 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">process_records</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">records</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
1129 | </div> | ||
1130 | </div> | ||
1131 | <div class='clearall'></div> | ||
1132 | <div class='section' id='section-71'> | ||
1133 | <div class='docs'> | ||
1134 | <div class='octowrap'> | ||
1135 | <a class='octothorpe' href='#section-71'>#</a> | ||
1136 | </div> | ||
1137 | |||
1138 | </div> | ||
1139 | <div class='code'> | ||
1140 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
1141 | <span class="n">schema</span> <span class="o">=</span> <span class="n">stream</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span> | ||
1142 | <span class="n">stream_metadata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">to_map</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">metadata</span><span class="p">)</span> | ||
1143 | <span class="k">with</span> <span class="n">metrics</span><span class="o">.</span><span class="n">record_counter</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> <span class="k">as</span> <span class="n">counter</span><span class="p">:</span> | ||
1144 | <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">records</span><span class="p">:</span> | ||
1145 | <span class="k">with</span> <span class="n">Transformer</span><span class="p">()</span> <span class="k">as</span> <span class="n">transformer</span><span class="p">:</span> | ||
1146 | <span class="k">try</span><span class="p">:</span> | ||
1147 | <span class="n">transformed_record</span> <span class="o">=</span> <span class="n">transformer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">record</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">stream_metadata</span><span class="p">)</span> | ||
1148 | <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span> | ||
1149 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="p">))</span> | ||
1150 | <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="n">err</span><span class="p">)</span> | ||
1151 | <span class="n">write_record</span><span class="p">(</span> | ||
1152 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1153 | <span class="n">record</span><span class="o">=</span><span class="n">transformed_record</span><span class="p">,</span> | ||
1154 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">,</span> | ||
1155 | <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">)</span> | ||
1156 | <span class="n">counter</span><span class="o">.</span><span class="n">increment</span><span class="p">()</span> | ||
1157 | <span class="k">return</span> <span class="n">counter</span><span class="o">.</span><span class="n">value</span></pre></div> | ||
1158 | </div> | ||
1159 | </div> | ||
1160 | <div class='clearall'></div> | ||
1161 | <div class='section' id='section-72'> | ||
1162 | <div class='docs'> | ||
1163 | <div class='octowrap'> | ||
1164 | <a class='octothorpe' href='#section-72'>#</a> | ||
1165 | </div> | ||
1166 | <p>This is just a pass-through to <code>process_records()</code></p> | ||
1167 | </div> | ||
1168 | <div class='code'> | ||
1169 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">records</span><span class="p">,</span> <span class="n">time_extracted</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
1170 | </div> | ||
1171 | </div> | ||
1172 | <div class='clearall'></div> | ||
1173 | <div class='section' id='section-73'> | ||
1174 | <div class='docs'> | ||
1175 | <div class='octowrap'> | ||
1176 | <a class='octothorpe' href='#section-73'>#</a> | ||
1177 | </div> | ||
1178 | |||
1179 | </div> | ||
1180 | <div class='code'> | ||
1181 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">stream_name</span> <span class="ow">in</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1182 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'STARTED Syncing </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
1183 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1184 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="n">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1185 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stream: </span><span class="si">{}</span><span class="s1">, selected_fields: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_fields</span><span class="p">))</span> | ||
1186 | <span class="n">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1187 | <span class="k">if</span> <span class="ow">not</span> <span class="n">time_extracted</span><span class="p">:</span> | ||
1188 | <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> | ||
1189 | <span class="n">record_count</span> <span class="o">=</span> <span class="n">process_records</span><span class="p">(</span> | ||
1190 | <span class="n">catalog</span><span class="o">=</span><span class="n">catalog</span><span class="p">,</span> | ||
1191 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1192 | <span class="n">records</span><span class="o">=</span><span class="n">records</span><span class="p">,</span> | ||
1193 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">)</span> | ||
1194 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'FINISHED Syncing </span><span class="si">{}</span><span class="s1">, Total Records: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">record_count</span><span class="p">))</span> | ||
1195 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div> | ||
1196 | </div> | ||
1197 | </div> | ||
1198 | <div class='clearall'></div> | ||
1199 | <div class='section' id='section-74'> | ||
1200 | <div class='docs'> | ||
1201 | <div class='octowrap'> | ||
1202 | <a class='octothorpe' href='#section-74'>#</a> | ||
1203 | </div> | ||
1204 | <p>See top of file for notes</p> | ||
1205 | </div> | ||
1206 | <div class='code'> | ||
1207 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">config</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">):</span> | ||
1208 | <span class="n">start_date</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'start_date'</span><span class="p">)</span> | ||
1209 | <span class="n">spreadsheet_id</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'spreadsheet_id'</span><span class="p">)</span> | ||
1210 | |||
1211 | <span class="n">last_stream</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">)</span> | ||
1212 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'last/currently syncing stream: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">last_stream</span><span class="p">))</span> | ||
1213 | |||
1214 | <span class="n">selected_streams</span> <span class="o">=</span> <span class="p">[]</span> | ||
1215 | <span class="k">for</span> <span class="n">stream</span> <span class="ow">in</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_selected_streams</span><span class="p">(</span><span class="n">state</span><span class="p">):</span> | ||
1216 | <span class="n">selected_streams</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">stream</span><span class="p">)</span> | ||
1217 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'selected_streams: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">selected_streams</span><span class="p">))</span> | ||
1218 | |||
1219 | <span class="k">if</span> <span class="ow">not</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1220 | <span class="k">return</span></pre></div> | ||
1221 | </div> | ||
1222 | </div> | ||
1223 | <div class='clearall'></div> | ||
1224 | <div class='section' id='section-75'> | ||
1225 | <div class='docs'> | ||
1226 | <div class='octowrap'> | ||
1227 | <a class='octothorpe' href='#section-75'>#</a> | ||
1228 | </div> | ||
1229 | <h2>FILE_METADATA</h2> | ||
1230 | </div> | ||
1231 | <div class='code'> | ||
1232 | <div class="highlight"><pre> <span class="n">file_metadata</span> <span class="o">=</span> <span class="p">{}</span> | ||
1233 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'file_metadata'</span> | ||
1234 | <span class="n">file_metadata_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
1235 | </div> | ||
1236 | </div> | ||
1237 | <div class='clearall'></div> | ||
1238 | <div class='section' id='section-76'> | ||
1239 | <div class='docs'> | ||
1240 | <div class='octowrap'> | ||
1241 | <a class='octothorpe' href='#section-76'>#</a> | ||
1242 | </div> | ||
1243 | <p>GET file_metadata</p> | ||
1244 | </div> | ||
1245 | <div class='code'> | ||
1246 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'GET file_meatadata'</span><span class="p">)</span> | ||
1247 | <span class="n">file_metadata</span><span class="p">,</span> <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span><span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1248 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">file_metadata_config</span><span class="p">,</span> | ||
1249 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1250 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">)</span></pre></div> | ||
1251 | </div> | ||
1252 | </div> | ||
1253 | <div class='clearall'></div> | ||
1254 | <div class='section' id='section-77'> | ||
1255 | <div class='docs'> | ||
1256 | <div class='octowrap'> | ||
1257 | <a class='octothorpe' href='#section-77'>#</a> | ||
1258 | </div> | ||
1259 | <p>Transform file_metadata</p> | ||
1260 | </div> | ||
1261 | <div class='code'> | ||
1262 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Transform file_meatadata'</span><span class="p">)</span> | ||
1263 | <span class="n">file_metadata_tf</span> <span class="o">=</span> <span class="n">transform_file_metadata</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">)</span></pre></div> | ||
1264 | </div> | ||
1265 | </div> | ||
1266 | <div class='clearall'></div> | ||
1267 | <div class='section' id='section-78'> | ||
1268 | <div class='docs'> | ||
1269 | <div class='octowrap'> | ||
1270 | <a class='octothorpe' href='#section-78'>#</a> | ||
1271 | </div> | ||
1272 | <p>Check if file has changed, if not exit</p> | ||
1273 | </div> | ||
1274 | <div class='code'> | ||
1275 | <div class="highlight"><pre> <span class="n">last_datetime</span> <span class="o">=</span> <span class="n">strptime_to_utc</span><span class="p">(</span><span class="n">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">start_date</span><span class="p">))</span> | ||
1276 | <span class="n">this_datetime</span> <span class="o">=</span> <span class="n">strptime_to_utc</span><span class="p">(</span><span class="n">file_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'modifiedTime'</span><span class="p">))</span> | ||
1277 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'last_datetime = </span><span class="si">{}</span><span class="s1">, this_datetime = </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">last_datetime</span><span class="p">,</span> <span class="n">this_datetime</span><span class="p">))</span> | ||
1278 | <span class="k">if</span> <span class="n">this_datetime</span> <span class="o"><=</span> <span class="n">last_datetime</span><span class="p">:</span> | ||
1279 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.'</span><span class="p">)</span> | ||
1280 | <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="s1">'file_metadata'</span><span class="p">,</span> <span class="n">strftime</span><span class="p">(</span><span class="n">this_datetime</span><span class="p">))</span> | ||
1281 | <span class="k">return</span></pre></div> | ||
1282 | </div> | ||
1283 | </div> | ||
1284 | <div class='clearall'></div> | ||
1285 | <div class='section' id='section-79'> | ||
1286 | <div class='docs'> | ||
1287 | <div class='octowrap'> | ||
1288 | <a class='octothorpe' href='#section-79'>#</a> | ||
1289 | </div> | ||
1290 | <p>Write file_metadata records if selected</p> | ||
1291 | </div> | ||
1292 | <div class='code'> | ||
1293 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">file_metadata_tf</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">)</span></pre></div> | ||
1294 | </div> | ||
1295 | </div> | ||
1296 | <div class='clearall'></div> | ||
1297 | <div class='section' id='section-80'> | ||
1298 | <div class='docs'> | ||
1299 | <div class='octowrap'> | ||
1300 | <a class='octothorpe' href='#section-80'>#</a> | ||
1301 | </div> | ||
1302 | <h2>SPREADSHEET_METADATA</h2> | ||
1303 | </div> | ||
1304 | <div class='code'> | ||
1305 | <div class="highlight"><pre> <span class="n">spreadsheet_metadata</span> <span class="o">=</span> <span class="p">{}</span> | ||
1306 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'spreadsheet_metadata'</span> | ||
1307 | <span class="n">spreadsheet_metadata_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
1308 | </div> | ||
1309 | </div> | ||
1310 | <div class='clearall'></div> | ||
1311 | <div class='section' id='section-81'> | ||
1312 | <div class='docs'> | ||
1313 | <div class='octowrap'> | ||
1314 | <a class='octothorpe' href='#section-81'>#</a> | ||
1315 | </div> | ||
1316 | <p>GET spreadsheet_metadata</p> | ||
1317 | </div> | ||
1318 | <div class='code'> | ||
1319 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'GET spreadsheet_meatadata'</span><span class="p">)</span> | ||
1320 | <span class="n">spreadsheet_metadata</span><span class="p">,</span> <span class="n">ss_time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span> | ||
1321 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1322 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">spreadsheet_metadata_config</span><span class="p">,</span> | ||
1323 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1324 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">)</span></pre></div> | ||
1325 | </div> | ||
1326 | </div> | ||
1327 | <div class='clearall'></div> | ||
1328 | <div class='section' id='section-82'> | ||
1329 | <div class='docs'> | ||
1330 | <div class='octowrap'> | ||
1331 | <a class='octothorpe' href='#section-82'>#</a> | ||
1332 | </div> | ||
1333 | <p>Transform spreadsheet_metadata</p> | ||
1334 | </div> | ||
1335 | <div class='code'> | ||
1336 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Transform spreadsheet_meatadata'</span><span class="p">)</span> | ||
1337 | <span class="n">spreadsheet_metadata_tf</span> <span class="o">=</span> <span class="n">transform_spreadsheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">)</span></pre></div> | ||
1338 | </div> | ||
1339 | </div> | ||
1340 | <div class='clearall'></div> | ||
1341 | <div class='section' id='section-83'> | ||
1342 | <div class='docs'> | ||
1343 | <div class='octowrap'> | ||
1344 | <a class='octothorpe' href='#section-83'>#</a> | ||
1345 | </div> | ||
1346 | <p>Write spreadsheet_metadata records if selected</p> | ||
1347 | </div> | ||
1348 | <div class='code'> | ||
1349 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">spreadsheet_metadata_tf</span><span class="p">,</span> \ | ||
1350 | <span class="n">ss_time_extracted</span><span class="p">)</span></pre></div> | ||
1351 | </div> | ||
1352 | </div> | ||
1353 | <div class='clearall'></div> | ||
1354 | <div class='section' id='section-84'> | ||
1355 | <div class='docs'> | ||
1356 | <div class='octowrap'> | ||
1357 | <a class='octothorpe' href='#section-84'>#</a> | ||
1358 | </div> | ||
1359 | <h2>SHEET_METADATA and SHEET_DATA</h2> | ||
1360 | </div> | ||
1361 | <div class='code'> | ||
1362 | <div class="highlight"><pre> <span class="n">sheets</span> <span class="o">=</span> <span class="n">spreadsheet_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">)</span> | ||
1363 | <span class="n">sheet_metadata</span> <span class="o">=</span> <span class="p">[]</span> | ||
1364 | <span class="n">sheets_loaded</span> <span class="o">=</span> <span class="p">[]</span> | ||
1365 | <span class="n">sheets_loaded_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'sheets_loaded'</span><span class="p">]</span> | ||
1366 | <span class="k">if</span> <span class="n">sheets</span><span class="p">:</span></pre></div> | ||
1367 | </div> | ||
1368 | </div> | ||
1369 | <div class='clearall'></div> | ||
1370 | <div class='section' id='section-85'> | ||
1371 | <div class='docs'> | ||
1372 | <div class='octowrap'> | ||
1373 | <a class='octothorpe' href='#section-85'>#</a> | ||
1374 | </div> | ||
1375 | <p>Loop thru sheets (worksheet tabs) in spreadsheet</p> | ||
1376 | </div> | ||
1377 | <div class='code'> | ||
1378 | <div class="highlight"><pre> <span class="k">for</span> <span class="n">sheet</span> <span class="ow">in</span> <span class="n">sheets</span><span class="p">:</span> | ||
1379 | <span class="n">sheet_title</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span> | ||
1380 | <span class="n">sheet_id</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheetId'</span><span class="p">)</span></pre></div> | ||
1381 | </div> | ||
1382 | </div> | ||
1383 | <div class='clearall'></div> | ||
1384 | <div class='section' id='section-86'> | ||
1385 | <div class='docs'> | ||
1386 | <div class='octowrap'> | ||
1387 | <a class='octothorpe' href='#section-86'>#</a> | ||
1388 | </div> | ||
1389 | <h3>Sheet_Metadata</h3> | ||
1390 | <p>GET sheet_metadata and columns</p> | ||
1391 | </div> | ||
1392 | <div class='code'> | ||
1393 | <div class="highlight"><pre> <span class="n">sheet_schema</span><span class="p">,</span> <span class="n">columns</span> <span class="o">=</span> <span class="n">get_sheet_metadata</span><span class="p">(</span><span class="n">sheet</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">client</span><span class="p">)</span> | ||
1394 | |||
1395 | <span class="k">if</span> <span class="ow">not</span> <span class="n">sheet_schema</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span> | ||
1396 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'SKIPPING Empty Sheet: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
1397 | <span class="k">else</span><span class="p">:</span></pre></div> | ||
1398 | </div> | ||
1399 | </div> | ||
1400 | <div class='clearall'></div> | ||
1401 | <div class='section' id='section-87'> | ||
1402 | <div class='docs'> | ||
1403 | <div class='octowrap'> | ||
1404 | <a class='octothorpe' href='#section-87'>#</a> | ||
1405 | </div> | ||
1406 | <p>Transform sheet_metadata</p> | ||
1407 | </div> | ||
1408 | <div class='code'> | ||
1409 | <div class="highlight"><pre> <span class="n">sheet_metadata_tf</span> <span class="o">=</span> <span class="n">transform_sheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet</span><span class="p">,</span> <span class="n">columns</span><span class="p">)</span> | ||
1410 | <span class="n">sheet_metadata</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_metadata_tf</span><span class="p">)</span></pre></div> | ||
1411 | </div> | ||
1412 | </div> | ||
1413 | <div class='clearall'></div> | ||
1414 | <div class='section' id='section-88'> | ||
1415 | <div class='docs'> | ||
1416 | <div class='octowrap'> | ||
1417 | <a class='octothorpe' href='#section-88'>#</a> | ||
1418 | </div> | ||
1419 | <h3>SHEET_DATA</h3> | ||
1420 | </div> | ||
1421 | <div class='code'> | ||
1422 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">sheet_title</span> <span class="ow">in</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1423 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'STARTED Syncing Sheet </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
1424 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span> | ||
1425 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="n">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span> | ||
1426 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stream: </span><span class="si">{}</span><span class="s1">, selected_fields: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">selected_fields</span><span class="p">))</span> | ||
1427 | <span class="n">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span></pre></div> | ||
1428 | </div> | ||
1429 | </div> | ||
1430 | <div class='clearall'></div> | ||
1431 | <div class='section' id='section-89'> | ||
1432 | <div class='docs'> | ||
1433 | <div class='octowrap'> | ||
1434 | <a class='octothorpe' href='#section-89'>#</a> | ||
1435 | </div> | ||
1436 | <p>Emit a Singer ACTIVATE_VERSION message before initial sync (but not subsequent syncs) | ||
1437 | everytime after each sheet sync is complete. | ||
1438 | This forces hard deletes on the data downstream if fewer records are sent. | ||
1439 | https://github.com/singer-io/singer-python/blob/master/singer/messages.py#L137</p> | ||
1440 | </div> | ||
1441 | <div class='code'> | ||
1442 | <div class="highlight"><pre> <span class="n">last_integer</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> | ||
1443 | <span class="n">activate_version</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)</span> | ||
1444 | <span class="n">activate_version_message</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">ActivateVersionMessage</span><span class="p">(</span> | ||
1445 | <span class="n">stream</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1446 | <span class="n">version</span><span class="o">=</span><span class="n">activate_version</span><span class="p">)</span> | ||
1447 | <span class="k">if</span> <span class="n">last_integer</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span></pre></div> | ||
1448 | </div> | ||
1449 | </div> | ||
1450 | <div class='clearall'></div> | ||
1451 | <div class='section' id='section-90'> | ||
1452 | <div class='docs'> | ||
1453 | <div class='octowrap'> | ||
1454 | <a class='octothorpe' href='#section-90'>#</a> | ||
1455 | </div> | ||
1456 | <p>initial load, send activate_version before AND after data sync</p> | ||
1457 | </div> | ||
1458 | <div class='code'> | ||
1459 | <div class="highlight"><pre> <span class="n">singer</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span><span class="n">activate_version_message</span><span class="p">)</span> | ||
1460 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'INITIAL SYNC, Stream: </span><span class="si">{}</span><span class="s1">, Activate Version: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">))</span></pre></div> | ||
1461 | </div> | ||
1462 | </div> | ||
1463 | <div class='clearall'></div> | ||
1464 | <div class='section' id='section-91'> | ||
1465 | <div class='docs'> | ||
1466 | <div class='octowrap'> | ||
1467 | <a class='octothorpe' href='#section-91'>#</a> | ||
1468 | </div> | ||
1469 | <p>Determine max range of columns and rows for “paging” through the data</p> | ||
1470 | </div> | ||
1471 | <div class='code'> | ||
1472 | <div class="highlight"><pre> <span class="n">sheet_last_col_index</span> <span class="o">=</span> <span class="mi">1</span> | ||
1473 | <span class="n">sheet_last_col_letter</span> <span class="o">=</span> <span class="s1">'A'</span> | ||
1474 | <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span> | ||
1475 | <span class="n">col_index</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnIndex'</span><span class="p">)</span> | ||
1476 | <span class="n">col_letter</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnLetter'</span><span class="p">)</span> | ||
1477 | <span class="k">if</span> <span class="n">col_index</span> <span class="o">></span> <span class="n">sheet_last_col_index</span><span class="p">:</span> | ||
1478 | <span class="n">sheet_last_col_index</span> <span class="o">=</span> <span class="n">col_index</span> | ||
1479 | <span class="n">sheet_last_col_letter</span> <span class="o">=</span> <span class="n">col_letter</span> | ||
1480 | <span class="n">sheet_max_row</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'gridProperties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rowCount'</span><span class="p">)</span></pre></div> | ||
1481 | </div> | ||
1482 | </div> | ||
1483 | <div class='clearall'></div> | ||
1484 | <div class='section' id='section-92'> | ||
1485 | <div class='docs'> | ||
1486 | <div class='octowrap'> | ||
1487 | <a class='octothorpe' href='#section-92'>#</a> | ||
1488 | </div> | ||
1489 | <p>Initialize paging for 1st batch</p> | ||
1490 | </div> | ||
1491 | <div class='code'> | ||
1492 | <div class="highlight"><pre> <span class="n">is_last_row</span> <span class="o">=</span> <span class="kc">False</span> | ||
1493 | <span class="n">batch_rows</span> <span class="o">=</span> <span class="mi">200</span> | ||
1494 | <span class="n">from_row</span> <span class="o">=</span> <span class="mi">2</span> | ||
1495 | <span class="k">if</span> <span class="n">sheet_max_row</span> <span class="o"><</span> <span class="n">batch_rows</span><span class="p">:</span> | ||
1496 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">sheet_max_row</span> | ||
1497 | <span class="k">else</span><span class="p">:</span> | ||
1498 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">batch_rows</span></pre></div> | ||
1499 | </div> | ||
1500 | </div> | ||
1501 | <div class='clearall'></div> | ||
1502 | <div class='section' id='section-93'> | ||
1503 | <div class='docs'> | ||
1504 | <div class='octowrap'> | ||
1505 | <a class='octothorpe' href='#section-93'>#</a> | ||
1506 | </div> | ||
1507 | <p>Loop thru batches (each having 200 rows of data)</p> | ||
1508 | </div> | ||
1509 | <div class='code'> | ||
1510 | <div class="highlight"><pre> <span class="k">while</span> <span class="ow">not</span> <span class="n">is_last_row</span> <span class="ow">and</span> <span class="n">from_row</span> <span class="o"><</span> <span class="n">sheet_max_row</span> <span class="ow">and</span> <span class="n">to_row</span> <span class="o"><=</span> <span class="n">sheet_max_row</span><span class="p">:</span> | ||
1511 | <span class="n">range_rows</span> <span class="o">=</span> <span class="s1">'A</span><span class="si">{}</span><span class="s1">:</span><span class="si">{}{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">from_row</span><span class="p">,</span> <span class="n">sheet_last_col_letter</span><span class="p">,</span> <span class="n">to_row</span><span class="p">)</span></pre></div> | ||
1512 | </div> | ||
1513 | </div> | ||
1514 | <div class='clearall'></div> | ||
1515 | <div class='section' id='section-94'> | ||
1516 | <div class='docs'> | ||
1517 | <div class='octowrap'> | ||
1518 | <a class='octothorpe' href='#section-94'>#</a> | ||
1519 | </div> | ||
1520 | <p>GET sheet_data for a worksheet tab</p> | ||
1521 | </div> | ||
1522 | <div class='code'> | ||
1523 | <div class="highlight"><pre> <span class="n">sheet_data</span><span class="p">,</span> <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span> | ||
1524 | <span class="n">stream_name</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1525 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">sheets_loaded_config</span><span class="p">,</span> | ||
1526 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1527 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">,</span> | ||
1528 | <span class="n">range_rows</span><span class="o">=</span><span class="n">range_rows</span><span class="p">)</span></pre></div> | ||
1529 | </div> | ||
1530 | </div> | ||
1531 | <div class='clearall'></div> | ||
1532 | <div class='section' id='section-95'> | ||
1533 | <div class='docs'> | ||
1534 | <div class='octowrap'> | ||
1535 | <a class='octothorpe' href='#section-95'>#</a> | ||
1536 | </div> | ||
1537 | <p>Data is returned as a list of arrays, an array of values for each row</p> | ||
1538 | </div> | ||
1539 | <div class='code'> | ||
1540 | <div class="highlight"><pre> <span class="n">sheet_data_rows</span> <span class="o">=</span> <span class="n">sheet_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'values'</span><span class="p">,</span> <span class="p">[])</span></pre></div> | ||
1541 | </div> | ||
1542 | </div> | ||
1543 | <div class='clearall'></div> | ||
1544 | <div class='section' id='section-96'> | ||
1545 | <div class='docs'> | ||
1546 | <div class='octowrap'> | ||
1547 | <a class='octothorpe' href='#section-96'>#</a> | ||
1548 | </div> | ||
1549 | <p>Transform batch of rows to JSON with keys for each column</p> | ||
1550 | </div> | ||
1551 | <div class='code'> | ||
1552 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span><span class="p">,</span> <span class="n">row_num</span> <span class="o">=</span> <span class="n">transform_sheet_data</span><span class="p">(</span> | ||
1553 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">,</span> | ||
1554 | <span class="n">sheet_id</span><span class="o">=</span><span class="n">sheet_id</span><span class="p">,</span> | ||
1555 | <span class="n">sheet_title</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1556 | <span class="n">from_row</span><span class="o">=</span><span class="n">from_row</span><span class="p">,</span> | ||
1557 | <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">,</span> | ||
1558 | <span class="n">sheet_data_rows</span><span class="o">=</span><span class="n">sheet_data_rows</span><span class="p">)</span> | ||
1559 | <span class="k">if</span> <span class="n">row_num</span> <span class="o"><</span> <span class="n">to_row</span><span class="p">:</span> | ||
1560 | <span class="n">is_last_row</span> <span class="o">=</span> <span class="kc">True</span></pre></div> | ||
1561 | </div> | ||
1562 | </div> | ||
1563 | <div class='clearall'></div> | ||
1564 | <div class='section' id='section-97'> | ||
1565 | <div class='docs'> | ||
1566 | <div class='octowrap'> | ||
1567 | <a class='octothorpe' href='#section-97'>#</a> | ||
1568 | </div> | ||
1569 | <p>Process records, send batch of records to target</p> | ||
1570 | </div> | ||
1571 | <div class='code'> | ||
1572 | <div class="highlight"><pre> <span class="n">record_count</span> <span class="o">=</span> <span class="n">process_records</span><span class="p">(</span> | ||
1573 | <span class="n">catalog</span><span class="o">=</span><span class="n">catalog</span><span class="p">,</span> | ||
1574 | <span class="n">stream_name</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1575 | <span class="n">records</span><span class="o">=</span><span class="n">sheet_data_tf</span><span class="p">,</span> | ||
1576 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">ss_time_extracted</span><span class="p">,</span> | ||
1577 | <span class="n">version</span><span class="o">=</span><span class="n">activate_version</span><span class="p">)</span> | ||
1578 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Sheet: </span><span class="si">{}</span><span class="s1">, records processed: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1579 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">record_count</span><span class="p">))</span></pre></div> | ||
1580 | </div> | ||
1581 | </div> | ||
1582 | <div class='clearall'></div> | ||
1583 | <div class='section' id='section-98'> | ||
1584 | <div class='docs'> | ||
1585 | <div class='octowrap'> | ||
1586 | <a class='octothorpe' href='#section-98'>#</a> | ||
1587 | </div> | ||
1588 | <p>Update paging from/to_row for next batch</p> | ||
1589 | </div> | ||
1590 | <div class='code'> | ||
1591 | <div class="highlight"><pre> <span class="n">from_row</span> <span class="o">=</span> <span class="n">to_row</span> <span class="o">+</span> <span class="mi">1</span> | ||
1592 | <span class="k">if</span> <span class="n">to_row</span> <span class="o">+</span> <span class="n">batch_rows</span> <span class="o">></span> <span class="n">sheet_max_row</span><span class="p">:</span> | ||
1593 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">sheet_max_row</span> | ||
1594 | <span class="k">else</span><span class="p">:</span> | ||
1595 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">to_row</span> <span class="o">+</span> <span class="n">batch_rows</span></pre></div> | ||
1596 | </div> | ||
1597 | </div> | ||
1598 | <div class='clearall'></div> | ||
1599 | <div class='section' id='section-99'> | ||
1600 | <div class='docs'> | ||
1601 | <div class='octowrap'> | ||
1602 | <a class='octothorpe' href='#section-99'>#</a> | ||
1603 | </div> | ||
1604 | <p>End of Stream: Send Activate Version and update State</p> | ||
1605 | </div> | ||
1606 | <div class='code'> | ||
1607 | <div class="highlight"><pre> <span class="n">singer</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span><span class="n">activate_version_message</span><span class="p">)</span> | ||
1608 | <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">)</span> | ||
1609 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'COMPLETE SYNC, Stream: </span><span class="si">{}</span><span class="s1">, Activate Version: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">))</span> | ||
1610 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'FINISHED Syncing Sheet </span><span class="si">{}</span><span class="s1">, Total Rows: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1611 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">row_num</span> <span class="o">-</span> <span class="mi">2</span><span class="p">))</span> <span class="c1"># subtract 1 for header row</span> | ||
1612 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div> | ||
1613 | </div> | ||
1614 | </div> | ||
1615 | <div class='clearall'></div> | ||
1616 | <div class='section' id='section-100'> | ||
1617 | <div class='docs'> | ||
1618 | <div class='octowrap'> | ||
1619 | <a class='octothorpe' href='#section-100'>#</a> | ||
1620 | </div> | ||
1621 | <p>SHEETS_LOADED | ||
1622 | Add sheet to sheets_loaded</p> | ||
1623 | </div> | ||
1624 | <div class='code'> | ||
1625 | <div class="highlight"><pre> <span class="n">sheet_loaded</span> <span class="o">=</span> <span class="p">{}</span> | ||
1626 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'spreadsheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
1627 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'sheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_id</span> | ||
1628 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_title</span> | ||
1629 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'loadDate'</span><span class="p">]</span> <span class="o">=</span> <span class="n">strftime</span><span class="p">(</span><span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">())</span> | ||
1630 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'lastRowNumber'</span><span class="p">]</span> <span class="o">=</span> <span class="n">row_num</span> | ||
1631 | <span class="n">sheets_loaded</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_loaded</span><span class="p">)</span> | ||
1632 | |||
1633 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'sheet_metadata'</span></pre></div> | ||
1634 | </div> | ||
1635 | </div> | ||
1636 | <div class='clearall'></div> | ||
1637 | <div class='section' id='section-101'> | ||
1638 | <div class='docs'> | ||
1639 | <div class='octowrap'> | ||
1640 | <a class='octothorpe' href='#section-101'>#</a> | ||
1641 | </div> | ||
1642 | <p>Write sheet_metadata records if selected</p> | ||
1643 | </div> | ||
1644 | <div class='code'> | ||
1645 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">sheet_metadata</span><span class="p">)</span> | ||
1646 | |||
1647 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'sheets_loaded'</span></pre></div> | ||
1648 | </div> | ||
1649 | </div> | ||
1650 | <div class='clearall'></div> | ||
1651 | <div class='section' id='section-102'> | ||
1652 | <div class='docs'> | ||
1653 | <div class='octowrap'> | ||
1654 | <a class='octothorpe' href='#section-102'>#</a> | ||
1655 | </div> | ||
1656 | <p>Write sheet_metadata records if selected</p> | ||
1657 | </div> | ||
1658 | <div class='code'> | ||
1659 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">sheets_loaded</span><span class="p">)</span></pre></div> | ||
1660 | </div> | ||
1661 | </div> | ||
1662 | <div class='clearall'></div> | ||
1663 | <div class='section' id='section-103'> | ||
1664 | <div class='docs'> | ||
1665 | <div class='octowrap'> | ||
1666 | <a class='octothorpe' href='#section-103'>#</a> | ||
1667 | </div> | ||
1668 | <p>Update file_metadata bookmark</p> | ||
1669 | </div> | ||
1670 | <div class='code'> | ||
1671 | <div class="highlight"><pre> <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="s1">'file_metadata'</span><span class="p">,</span> <span class="n">strftime</span><span class="p">(</span><span class="n">this_datetime</span><span class="p">))</span> | ||
1672 | |||
1673 | <span class="k">return</span> | ||
1674 | |||
1675 | </pre></div> | ||
1676 | </div> | ||
1677 | </div> | ||
1678 | <div class='clearall'></div> | ||
1679 | </div> | ||
1680 | </body> | ||