diff options
Diffstat (limited to 'docs/sync.html')
-rw-r--r-- | docs/sync.html | 1680 |
1 files changed, 1680 insertions, 0 deletions
diff --git a/docs/sync.html b/docs/sync.html new file mode 100644 index 0000000..97ef9fa --- /dev/null +++ b/docs/sync.html | |||
@@ -0,0 +1,1680 @@ | |||
1 | <!DOCTYPE html> | ||
2 | <html> | ||
3 | <head> | ||
4 | <meta http-equiv="content-type" content="text/html;charset=utf-8"> | ||
5 | <title>sync.py</title> | ||
6 | <link rel="stylesheet" href="pycco.css"> | ||
7 | </head> | ||
8 | <body> | ||
9 | <div id='container'> | ||
10 | <div id="background"></div> | ||
11 | <div class='section'> | ||
12 | <div class='docs'><h1>sync.py</h1></div> | ||
13 | </div> | ||
14 | <div class='clearall'> | ||
15 | <div class='section' id='section-0'> | ||
16 | <div class='docs'> | ||
17 | <div class='octowrap'> | ||
18 | <a class='octothorpe' href='#section-0'>#</a> | ||
19 | </div> | ||
20 | <p>This module contains the logic to sync data from the API.</p> | ||
21 | <hr /> | ||
22 | <p>Syncable streams: The tap seems to care about syncing the streams in this order.</p> | ||
23 | <ol> | ||
24 | <li><code>file_metadata</code></li> | ||
25 | <li><code>spreadsheet_metadata</code></li> | ||
26 | <li><em>N</em> Sheets</li> | ||
27 | <li><code>sheet_metadata</code></li> | ||
28 | <li><code>sheets_loaded</code></li> | ||
29 | <li><code>sheets_loaded</code></li> | ||
30 | </ol> | ||
31 | <hr /> | ||
32 | <p>The flow through this module is:</p> | ||
33 | <ol> | ||
34 | <li>Entrypoint: <code>sync()</code></li> | ||
35 | <li>Sync <code>file_metadata</code><ol> | ||
36 | <li><code>get_data()</code></li> | ||
37 | <li><code>transform_file_metadata()</code></li> | ||
38 | <li>Maybe exit the sync</li> | ||
39 | <li><code>sync_stream()</code></li> | ||
40 | </ol> | ||
41 | </li> | ||
42 | <li>Sync <code>spreadsheet_metadata</code><ol> | ||
43 | <li><code>get_data()</code></li> | ||
44 | <li><code>transform_spreadsheet_metadata()</code></li> | ||
45 | <li><code>sync_stream()</code></li> | ||
46 | </ol> | ||
47 | </li> | ||
48 | <li>Sync all of the Sheets. Here’s the process for a single Sheet<ol> | ||
49 | <li><code>get_sheet_metadata()</code></li> | ||
50 | <li><code>transform_sheet_metadata()</code></li> | ||
51 | <li><code>get_data()</code></li> | ||
52 | <li><code>transform_sheet_data()</code></li> | ||
53 | <li><code>process_records()</code></li> | ||
54 | </ol> | ||
55 | </li> | ||
56 | <li>Sync <code>sheet_metadata</code><ol> | ||
57 | <li><code>sync_stream()</code></li> | ||
58 | </ol> | ||
59 | </li> | ||
60 | <li>Sync <code>sheets_loaded</code><ol> | ||
61 | <li><code>sync_stream()</code></li> | ||
62 | </ol> | ||
63 | </li> | ||
64 | <li>Sync <code>sheets_loaded</code><ol> | ||
65 | <li><code>sync_stream()</code></li> | ||
66 | </ol> | ||
67 | </li> | ||
68 | </ol> | ||
69 | </div> | ||
70 | <div class='code'> | ||
71 | <div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">time</span> | ||
72 | <span class="kn">import</span> <span class="nn">math</span> | ||
73 | <span class="kn">import</span> <span class="nn">json</span> | ||
74 | <span class="kn">import</span> <span class="nn">re</span> | ||
75 | <span class="kn">import</span> <span class="nn">urllib.parse</span> | ||
76 | <span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> | ||
77 | <span class="kn">import</span> <span class="nn">pytz</span> | ||
78 | <span class="kn">import</span> <span class="nn">singer</span> | ||
79 | <span class="kn">from</span> <span class="nn">singer</span> <span class="kn">import</span> <span class="n">metrics</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="n">Transformer</span><span class="p">,</span> <span class="n">utils</span> | ||
80 | <span class="kn">from</span> <span class="nn">singer.utils</span> <span class="kn">import</span> <span class="n">strptime_to_utc</span><span class="p">,</span> <span class="n">strftime</span> | ||
81 | <span class="kn">from</span> <span class="nn">singer.messages</span> <span class="kn">import</span> <span class="n">RecordMessage</span> | ||
82 | <span class="kn">from</span> <span class="nn">tap_google_sheets.streams</span> <span class="kn">import</span> <span class="n">STREAMS</span> | ||
83 | <span class="kn">from</span> <span class="nn">tap_google_sheets.schema</span> <span class="kn">import</span> <span class="n">get_sheet_metadata</span> | ||
84 | |||
85 | <span class="n">LOGGER</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_logger</span><span class="p">()</span></pre></div> | ||
86 | </div> | ||
87 | </div> | ||
88 | <div class='clearall'></div> | ||
89 | <div class='section' id='section-1'> | ||
90 | <div class='docs'> | ||
91 | <div class='octowrap'> | ||
92 | <a class='octothorpe' href='#section-1'>#</a> | ||
93 | </div> | ||
94 | <hr /> | ||
95 | </div> | ||
96 | <div class='code'> | ||
97 | <div class="highlight"><pre></pre></div> | ||
98 | </div> | ||
99 | </div> | ||
100 | <div class='clearall'></div> | ||
101 | <div class='section' id='section-2'> | ||
102 | <div class='docs'> | ||
103 | <div class='octowrap'> | ||
104 | <a class='octothorpe' href='#section-2'>#</a> | ||
105 | </div> | ||
106 | <h1>Helper Functions</h1> | ||
107 | </div> | ||
108 | <div class='code'> | ||
109 | <div class="highlight"><pre></pre></div> | ||
110 | </div> | ||
111 | </div> | ||
112 | <div class='clearall'></div> | ||
113 | <div class='section' id='section-3'> | ||
114 | <div class='docs'> | ||
115 | <div class='octowrap'> | ||
116 | <a class='octothorpe' href='#section-3'>#</a> | ||
117 | </div> | ||
118 | <hr /> | ||
119 | <p>Log that we write a schema via singer.write_schema</p> | ||
120 | </div> | ||
121 | <div class='code'> | ||
122 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
123 | </div> | ||
124 | </div> | ||
125 | <div class='clearall'></div> | ||
126 | <div class='section' id='section-4'> | ||
127 | <div class='docs'> | ||
128 | <div class='octowrap'> | ||
129 | <a class='octothorpe' href='#section-4'>#</a> | ||
130 | </div> | ||
131 | |||
132 | </div> | ||
133 | <div class='code'> | ||
134 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
135 | <span class="n">schema</span> <span class="o">=</span> <span class="n">stream</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span> | ||
136 | <span class="k">try</span><span class="p">:</span> | ||
137 | <span class="n">singer</span><span class="o">.</span><span class="n">write_schema</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">stream</span><span class="o">.</span><span class="n">key_properties</span><span class="p">)</span> | ||
138 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Writing schema for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
139 | <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span></pre></div> | ||
140 | </div> | ||
141 | </div> | ||
142 | <div class='clearall'></div> | ||
143 | <div class='section' id='section-5'> | ||
144 | <div class='docs'> | ||
145 | <div class='octowrap'> | ||
146 | <a class='octothorpe' href='#section-5'>#</a> | ||
147 | </div> | ||
148 | <p>QUESTION: When do we encounter an OSError?</p> | ||
149 | </div> | ||
150 | <div class='code'> | ||
151 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'OS Error writing schema for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
152 | <span class="k">raise</span> <span class="n">err</span></pre></div> | ||
153 | </div> | ||
154 | </div> | ||
155 | <div class='clearall'></div> | ||
156 | <div class='section' id='section-6'> | ||
157 | <div class='docs'> | ||
158 | <div class='octowrap'> | ||
159 | <a class='octothorpe' href='#section-6'>#</a> | ||
160 | </div> | ||
161 | <p>Write a RecordMessage, with the given version if it was passed in</p> | ||
162 | </div> | ||
163 | <div class='code'> | ||
164 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_record</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">record</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
165 | </div> | ||
166 | </div> | ||
167 | <div class='clearall'></div> | ||
168 | <div class='section' id='section-7'> | ||
169 | <div class='docs'> | ||
170 | <div class='octowrap'> | ||
171 | <a class='octothorpe' href='#section-7'>#</a> | ||
172 | </div> | ||
173 | |||
174 | </div> | ||
175 | <div class='code'> | ||
176 | <div class="highlight"><pre> <span class="k">try</span><span class="p">:</span> | ||
177 | <span class="k">if</span> <span class="n">version</span><span class="p">:</span> | ||
178 | <span class="n">singer</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span> | ||
179 | <span class="n">RecordMessage</span><span class="p">(</span> | ||
180 | <span class="n">stream</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
181 | <span class="n">record</span><span class="o">=</span><span class="n">record</span><span class="p">,</span> | ||
182 | <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">,</span> | ||
183 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">))</span> | ||
184 | <span class="k">else</span><span class="p">:</span> | ||
185 | <span class="n">singer</span><span class="o">.</span><span class="n">messages</span><span class="o">.</span><span class="n">write_record</span><span class="p">(</span> | ||
186 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
187 | <span class="n">record</span><span class="o">=</span><span class="n">record</span><span class="p">,</span> | ||
188 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">)</span> | ||
189 | <span class="k">except</span> <span class="ne">OSError</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span></pre></div> | ||
190 | </div> | ||
191 | </div> | ||
192 | <div class='clearall'></div> | ||
193 | <div class='section' id='section-8'> | ||
194 | <div class='docs'> | ||
195 | <div class='octowrap'> | ||
196 | <a class='octothorpe' href='#section-8'>#</a> | ||
197 | </div> | ||
198 | <p>QUESTION: When do we encounter an OSError?</p> | ||
199 | </div> | ||
200 | <div class='code'> | ||
201 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'OS Error writing record for: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
202 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'record: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">record</span><span class="p">))</span> | ||
203 | <span class="k">raise</span> <span class="n">err</span></pre></div> | ||
204 | </div> | ||
205 | </div> | ||
206 | <div class='clearall'></div> | ||
207 | <div class='section' id='section-9'> | ||
208 | <div class='docs'> | ||
209 | <div class='octowrap'> | ||
210 | <a class='octothorpe' href='#section-9'>#</a> | ||
211 | </div> | ||
212 | <p>Safe get a bookmark from <code>state</code>.</p> | ||
213 | </div> | ||
214 | <div class='code'> | ||
215 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="n">default</span><span class="p">):</span></pre></div> | ||
216 | </div> | ||
217 | </div> | ||
218 | <div class='clearall'></div> | ||
219 | <div class='section' id='section-10'> | ||
220 | <div class='docs'> | ||
221 | <div class='octowrap'> | ||
222 | <a class='octothorpe' href='#section-10'>#</a> | ||
223 | </div> | ||
224 | <p>Hides an error though if <code>state</code> turns out to be <code>None</code></p> | ||
225 | </div> | ||
226 | <div class='code'> | ||
227 | <div class="highlight"><pre> <span class="k">if</span> <span class="p">(</span><span class="n">state</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span><span class="s1">'bookmarks'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">):</span> | ||
228 | <span class="k">return</span> <span class="n">default</span></pre></div> | ||
229 | </div> | ||
230 | </div> | ||
231 | <div class='clearall'></div> | ||
232 | <div class='section' id='section-11'> | ||
233 | <div class='docs'> | ||
234 | <div class='octowrap'> | ||
235 | <a class='octothorpe' href='#section-11'>#</a> | ||
236 | </div> | ||
237 | <p>This is also short enough for one line, is this supposed to be more readable?</p> | ||
238 | </div> | ||
239 | <div class='code'> | ||
240 | <div class="highlight"><pre> <span class="k">return</span> <span class="p">(</span> | ||
241 | <span class="n">state</span> | ||
242 | <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'bookmarks'</span><span class="p">,</span> <span class="p">{})</span> | ||
243 | <span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">default</span><span class="p">)</span> | ||
244 | <span class="p">)</span></pre></div> | ||
245 | </div> | ||
246 | </div> | ||
247 | <div class='clearall'></div> | ||
248 | <div class='section' id='section-12'> | ||
249 | <div class='docs'> | ||
250 | <div class='octowrap'> | ||
251 | <a class='octothorpe' href='#section-12'>#</a> | ||
252 | </div> | ||
253 | <p>Updates and write state</p> | ||
254 | </div> | ||
255 | <div class='code'> | ||
256 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span></pre></div> | ||
257 | </div> | ||
258 | </div> | ||
259 | <div class='clearall'></div> | ||
260 | <div class='section' id='section-13'> | ||
261 | <div class='docs'> | ||
262 | <div class='octowrap'> | ||
263 | <a class='octothorpe' href='#section-13'>#</a> | ||
264 | </div> | ||
265 | |||
266 | </div> | ||
267 | <div class='code'> | ||
268 | <div class="highlight"><pre> <span class="k">if</span> <span class="s1">'bookmarks'</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">state</span><span class="p">:</span> | ||
269 | <span class="n">state</span><span class="p">[</span><span class="s1">'bookmarks'</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span> | ||
270 | <span class="n">state</span><span class="p">[</span><span class="s1">'bookmarks'</span><span class="p">][</span><span class="n">stream</span><span class="p">]</span> <span class="o">=</span> <span class="n">value</span> | ||
271 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Write state for stream: </span><span class="si">{}</span><span class="s1">, value: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
272 | <span class="n">singer</span><span class="o">.</span><span class="n">write_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></pre></div> | ||
273 | </div> | ||
274 | </div> | ||
275 | <div class='clearall'></div> | ||
276 | <div class='section' id='section-14'> | ||
277 | <div class='docs'> | ||
278 | <div class='octowrap'> | ||
279 | <a class='octothorpe' href='#section-14'>#</a> | ||
280 | </div> | ||
281 | <p>Upserts or deletes the ‘currently_syncing’ stream</p> | ||
282 | </div> | ||
283 | <div class='code'> | ||
284 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
285 | </div> | ||
286 | </div> | ||
287 | <div class='clearall'></div> | ||
288 | <div class='section' id='section-15'> | ||
289 | <div class='docs'> | ||
290 | <div class='octowrap'> | ||
291 | <a class='octothorpe' href='#section-15'>#</a> | ||
292 | </div> | ||
293 | <p>Why do we care if <code>stream_name</code> is passed in to delete <code>currently_syncing</code>?</p> | ||
294 | </div> | ||
295 | <div class='code'> | ||
296 | <div class="highlight"><pre> <span class="k">if</span> <span class="p">(</span><span class="n">stream_name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="s1">'currently_syncing'</span> <span class="ow">in</span> <span class="n">state</span><span class="p">):</span> | ||
297 | <span class="k">del</span> <span class="n">state</span><span class="p">[</span><span class="s1">'currently_syncing'</span><span class="p">]</span> | ||
298 | <span class="k">else</span><span class="p">:</span> | ||
299 | <span class="n">singer</span><span class="o">.</span><span class="n">set_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
300 | <span class="n">singer</span><span class="o">.</span><span class="n">write_state</span><span class="p">(</span><span class="n">state</span><span class="p">)</span></pre></div> | ||
301 | </div> | ||
302 | </div> | ||
303 | <div class='clearall'></div> | ||
304 | <div class='section' id='section-16'> | ||
305 | <div class='docs'> | ||
306 | <div class='octowrap'> | ||
307 | <a class='octothorpe' href='#section-16'>#</a> | ||
308 | </div> | ||
309 | <p>Get a list of selected, top-level fields for <code>stream_name</code></p> | ||
310 | </div> | ||
311 | <div class='code'> | ||
312 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">):</span></pre></div> | ||
313 | </div> | ||
314 | </div> | ||
315 | <div class='clearall'></div> | ||
316 | <div class='section' id='section-17'> | ||
317 | <div class='docs'> | ||
318 | <div class='octowrap'> | ||
319 | <a class='octothorpe' href='#section-17'>#</a> | ||
320 | </div> | ||
321 | |||
322 | </div> | ||
323 | <div class='code'> | ||
324 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
325 | <span class="n">mdata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">to_map</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">metadata</span><span class="p">)</span> | ||
326 | <span class="n">mdata_list</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">to_list</span><span class="p">(</span><span class="n">mdata</span><span class="p">)</span> | ||
327 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="p">[]</span> | ||
328 | <span class="k">for</span> <span class="n">entry</span> <span class="ow">in</span> <span class="n">mdata_list</span><span class="p">:</span> | ||
329 | <span class="n">field</span> <span class="o">=</span> <span class="kc">None</span> | ||
330 | <span class="k">try</span><span class="p">:</span> | ||
331 | <span class="n">field</span> <span class="o">=</span> <span class="n">entry</span><span class="p">[</span><span class="s1">'breadcrumb'</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span> | ||
332 | <span class="k">if</span> <span class="n">entry</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'metadata'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'selected'</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span> | ||
333 | <span class="n">selected_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">field</span><span class="p">)</span> | ||
334 | <span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span></pre></div> | ||
335 | </div> | ||
336 | </div> | ||
337 | <div class='clearall'></div> | ||
338 | <div class='section' id='section-18'> | ||
339 | <div class='docs'> | ||
340 | <div class='octowrap'> | ||
341 | <a class='octothorpe' href='#section-18'>#</a> | ||
342 | </div> | ||
343 | <p>Swallow the error for the Stream level metadata</p> | ||
344 | </div> | ||
345 | <div class='code'> | ||
346 | <div class="highlight"><pre> <span class="k">pass</span> | ||
347 | <span class="k">return</span> <span class="n">selected_fields</span></pre></div> | ||
348 | </div> | ||
349 | </div> | ||
350 | <div class='clearall'></div> | ||
351 | <div class='section' id='section-19'> | ||
352 | <div class='docs'> | ||
353 | <div class='octowrap'> | ||
354 | <a class='octothorpe' href='#section-19'>#</a> | ||
355 | </div> | ||
356 | <p>Construct the request we want to make, make the request, and return the Response</p> | ||
357 | </div> | ||
358 | <div class='code'> | ||
359 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">get_data</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">endpoint_config</span><span class="p">,</span> <span class="n">client</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">range_rows</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
360 | </div> | ||
361 | </div> | ||
362 | <div class='clearall'></div> | ||
363 | <div class='section' id='section-20'> | ||
364 | <div class='docs'> | ||
365 | <div class='octowrap'> | ||
366 | <a class='octothorpe' href='#section-20'>#</a> | ||
367 | </div> | ||
368 | |||
369 | </div> | ||
370 | <div class='code'> | ||
371 | <div class="highlight"><pre></pre></div> | ||
372 | </div> | ||
373 | </div> | ||
374 | <div class='clearall'></div> | ||
375 | <div class='section' id='section-21'> | ||
376 | <div class='docs'> | ||
377 | <div class='octowrap'> | ||
378 | <a class='octothorpe' href='#section-21'>#</a> | ||
379 | </div> | ||
380 | <h3>Build the query</h3> | ||
381 | </div> | ||
382 | <div class='code'> | ||
383 | <div class="highlight"><pre> <span class="n">stream_name_escaped</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">escape</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
384 | </div> | ||
385 | </div> | ||
386 | <div class='clearall'></div> | ||
387 | <div class='section' id='section-22'> | ||
388 | <div class='docs'> | ||
389 | <div class='octowrap'> | ||
390 | <a class='octothorpe' href='#section-22'>#</a> | ||
391 | </div> | ||
392 | <p>Encode stream_name to fix issues with special characters in <code>stream_name</code> | ||
393 | QUESTION: If there’s special characters here how do databases handle it?</p> | ||
394 | </div> | ||
395 | <div class='code'> | ||
396 | <div class="highlight"><pre> <span class="n">stream_name_encoded</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">parse</span><span class="o">.</span><span class="n">quote_plus</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
397 | |||
398 | <span class="k">if</span> <span class="ow">not</span> <span class="n">range_rows</span><span class="p">:</span> | ||
399 | <span class="n">range_rows</span> <span class="o">=</span> <span class="s1">''</span></pre></div> | ||
400 | </div> | ||
401 | </div> | ||
402 | <div class='clearall'></div> | ||
403 | <div class='section' id='section-23'> | ||
404 | <div class='docs'> | ||
405 | <div class='octowrap'> | ||
406 | <a class='octothorpe' href='#section-23'>#</a> | ||
407 | </div> | ||
408 | <p>QUESTION: Why is this not a <code>string.format()</code> with keywords?</p> | ||
409 | </div> | ||
410 | <div class='code'> | ||
411 | <div class="highlight"><pre> <span class="n">path</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'path'</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
412 | <span class="s1">'</span><span class="si">{spreadsheet_id}</span><span class="s1">'</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'</span><span class="si">{sheet_title}</span><span class="s1">'</span><span class="p">,</span> <span class="n">stream_name_encoded</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
413 | <span class="s1">'</span><span class="si">{range_rows}</span><span class="s1">'</span><span class="p">,</span> <span class="n">range_rows</span><span class="p">)</span> | ||
414 | <span class="n">params</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'params'</span><span class="p">,</span> <span class="p">{})</span> | ||
415 | <span class="n">api</span> <span class="o">=</span> <span class="n">endpoint_config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'api'</span><span class="p">,</span> <span class="s1">'sheets'</span><span class="p">)</span> | ||
416 | <span class="n">querystring</span> <span class="o">=</span> <span class="s1">'&'</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="s1">'</span><span class="si">%s</span><span class="s1">=</span><span class="si">%s</span><span class="s1">'</span> <span class="o">%</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="k">for</span> <span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span> <span class="ow">in</span> <span class="n">params</span><span class="o">.</span><span class="n">items</span><span class="p">()])</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span> | ||
417 | <span class="s1">'</span><span class="si">{sheet_title}</span><span class="s1">'</span><span class="p">,</span> <span class="n">stream_name_encoded</span><span class="p">)</span> | ||
418 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'URL: </span><span class="si">{}</span><span class="s1">/</span><span class="si">{}</span><span class="s1">?</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">client</span><span class="o">.</span><span class="n">base_url</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">querystring</span><span class="p">))</span> | ||
419 | <span class="n">data</span> <span class="o">=</span> <span class="p">{}</span> | ||
420 | <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">()</span></pre></div> | ||
421 | </div> | ||
422 | </div> | ||
423 | <div class='clearall'></div> | ||
424 | <div class='section' id='section-24'> | ||
425 | <div class='docs'> | ||
426 | <div class='octowrap'> | ||
427 | <a class='octothorpe' href='#section-24'>#</a> | ||
428 | </div> | ||
429 | <h3>Make the query</h3> | ||
430 | </div> | ||
431 | <div class='code'> | ||
432 | <div class="highlight"><pre> <span class="n">data</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">get</span><span class="p">(</span> | ||
433 | <span class="n">path</span><span class="o">=</span><span class="n">path</span><span class="p">,</span> | ||
434 | <span class="n">api</span><span class="o">=</span><span class="n">api</span><span class="p">,</span> | ||
435 | <span class="n">params</span><span class="o">=</span><span class="n">querystring</span><span class="p">,</span> | ||
436 | <span class="n">endpoint</span><span class="o">=</span><span class="n">stream_name_escaped</span><span class="p">)</span></pre></div> | ||
437 | </div> | ||
438 | </div> | ||
439 | <div class='clearall'></div> | ||
440 | <div class='section' id='section-25'> | ||
441 | <div class='docs'> | ||
442 | <div class='octowrap'> | ||
443 | <a class='octothorpe' href='#section-25'>#</a> | ||
444 | </div> | ||
445 | <h3>Return the Response.json()</h3> | ||
446 | </div> | ||
447 | <div class='code'> | ||
448 | <div class="highlight"><pre> <span class="k">return</span> <span class="n">data</span><span class="p">,</span> <span class="n">time_extracted</span></pre></div> | ||
449 | </div> | ||
450 | </div> | ||
451 | <div class='clearall'></div> | ||
452 | <div class='section' id='section-26'> | ||
453 | <div class='docs'> | ||
454 | <div class='octowrap'> | ||
455 | <a class='octothorpe' href='#section-26'>#</a> | ||
456 | </div> | ||
457 | <hr /> | ||
458 | <h1>Transform Functions</h1> | ||
459 | <p>There’s this line of code that happens in these that is a bit confusing:</p> | ||
460 | </div> | ||
461 | <div class='code'> | ||
462 | <div class="highlight"><pre></pre></div> | ||
463 | </div> | ||
464 | </div> | ||
465 | <div class='clearall'></div> | ||
466 | <div class='section' id='section-27'> | ||
467 | <div class='docs'> | ||
468 | <div class='octowrap'> | ||
469 | <a class='octothorpe' href='#section-27'>#</a> | ||
470 | </div> | ||
471 | <pre><code class="language-python">json.loads(json.dumps(some_object)) | ||
472 | </code></pre> | ||
473 | </div> | ||
474 | <div class='code'> | ||
475 | <div class="highlight"><pre></pre></div> | ||
476 | </div> | ||
477 | </div> | ||
478 | <div class='clearall'></div> | ||
479 | <div class='section' id='section-28'> | ||
480 | <div class='docs'> | ||
481 | <div class='octowrap'> | ||
482 | <a class='octothorpe' href='#section-28'>#</a> | ||
483 | </div> | ||
484 | <p>I don’t see the use here. We turn Python into a JSON string and back again. | ||
485 | The only thing I could see in the repl is that integer keys get stringified.</p> | ||
486 | </div> | ||
487 | <div class='code'> | ||
488 | <div class="highlight"><pre></pre></div> | ||
489 | </div> | ||
490 | </div> | ||
491 | <div class='clearall'></div> | ||
492 | <div class='section' id='section-29'> | ||
493 | <div class='docs'> | ||
494 | <div class='octowrap'> | ||
495 | <a class='octothorpe' href='#section-29'>#</a> | ||
496 | </div> | ||
497 | <p>In general, the transform functions just look like “maybe pop some | ||
498 | stuff”, “maybe add some stuff”, and return the input in a list</p> | ||
499 | </div> | ||
500 | <div class='code'> | ||
501 | <div class="highlight"><pre></pre></div> | ||
502 | </div> | ||
503 | </div> | ||
504 | <div class='clearall'></div> | ||
505 | <div class='section' id='section-30'> | ||
506 | <div class='docs'> | ||
507 | <div class='octowrap'> | ||
508 | <a class='octothorpe' href='#section-30'>#</a> | ||
509 | </div> | ||
510 | <hr /> | ||
511 | </div> | ||
512 | <div class='code'> | ||
513 | <div class="highlight"><pre></pre></div> | ||
514 | </div> | ||
515 | </div> | ||
516 | <div class='clearall'></div> | ||
517 | <div class='section' id='section-31'> | ||
518 | <div class='docs'> | ||
519 | <div class='octowrap'> | ||
520 | <a class='octothorpe' href='#section-31'>#</a> | ||
521 | </div> | ||
522 | <p>remove nodes from lastModifyingUser, format as array</p> | ||
523 | </div> | ||
524 | <div class='code'> | ||
525 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_file_metadata</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">):</span></pre></div> | ||
526 | </div> | ||
527 | </div> | ||
528 | <div class='clearall'></div> | ||
529 | <div class='section' id='section-32'> | ||
530 | <div class='docs'> | ||
531 | <div class='octowrap'> | ||
532 | <a class='octothorpe' href='#section-32'>#</a> | ||
533 | </div> | ||
534 | |||
535 | </div> | ||
536 | <div class='code'> | ||
537 | <div class="highlight"><pre> <span class="n">file_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">))</span> | ||
538 | |||
539 | <span class="k">if</span> <span class="n">file_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'lastModifyingUser'</span><span class="p">):</span> | ||
540 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'photoLink'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
541 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'me'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
542 | <span class="n">file_metadata_tf</span><span class="p">[</span><span class="s1">'lastModifyingUser'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'permissionId'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
543 | |||
544 | <span class="n">file_metadata_arr</span> <span class="o">=</span> <span class="p">[]</span> | ||
545 | <span class="n">file_metadata_arr</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">file_metadata_tf</span><span class="p">)</span> | ||
546 | <span class="k">return</span> <span class="n">file_metadata_arr</span></pre></div> | ||
547 | </div> | ||
548 | </div> | ||
549 | <div class='clearall'></div> | ||
550 | <div class='section' id='section-33'> | ||
551 | <div class='docs'> | ||
552 | <div class='octowrap'> | ||
553 | <a class='octothorpe' href='#section-33'>#</a> | ||
554 | </div> | ||
555 | <p>remove defaultFormat and sheets nodes, format as array</p> | ||
556 | </div> | ||
557 | <div class='code'> | ||
558 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_spreadsheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">):</span></pre></div> | ||
559 | </div> | ||
560 | </div> | ||
561 | <div class='clearall'></div> | ||
562 | <div class='section' id='section-34'> | ||
563 | <div class='docs'> | ||
564 | <div class='octowrap'> | ||
565 | <a class='octothorpe' href='#section-34'>#</a> | ||
566 | </div> | ||
567 | |||
568 | </div> | ||
569 | <div class='code'> | ||
570 | <div class="highlight"><pre> <span class="n">spreadsheet_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">))</span> | ||
571 | |||
572 | <span class="k">if</span> <span class="n">spreadsheet_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">):</span> | ||
573 | <span class="n">spreadsheet_metadata_tf</span><span class="p">[</span><span class="s1">'properties'</span><span class="p">]</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'defaultFormat'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
574 | <span class="n">spreadsheet_metadata_tf</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> | ||
575 | |||
576 | <span class="n">spreadsheet_metadata_arr</span> <span class="o">=</span> <span class="p">[]</span> | ||
577 | <span class="n">spreadsheet_metadata_arr</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">spreadsheet_metadata_tf</span><span class="p">)</span> | ||
578 | <span class="k">return</span> <span class="n">spreadsheet_metadata_arr</span></pre></div> | ||
579 | </div> | ||
580 | </div> | ||
581 | <div class='clearall'></div> | ||
582 | <div class='section' id='section-35'> | ||
583 | <div class='docs'> | ||
584 | <div class='octowrap'> | ||
585 | <a class='octothorpe' href='#section-35'>#</a> | ||
586 | </div> | ||
587 | <p>add spreadsheetId, sheetUrl, and columns metadata</p> | ||
588 | </div> | ||
589 | <div class='code'> | ||
590 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_sheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet</span><span class="p">,</span> <span class="n">columns</span><span class="p">):</span></pre></div> | ||
591 | </div> | ||
592 | </div> | ||
593 | <div class='clearall'></div> | ||
594 | <div class='section' id='section-36'> | ||
595 | <div class='docs'> | ||
596 | <div class='octowrap'> | ||
597 | <a class='octothorpe' href='#section-36'>#</a> | ||
598 | </div> | ||
599 | |||
600 | </div> | ||
601 | <div class='code'> | ||
602 | <div class="highlight"><pre> <span class="n">sheet_metadata</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">)</span> | ||
603 | <span class="n">sheet_metadata_tf</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">sheet_metadata</span><span class="p">))</span> | ||
604 | <span class="n">sheet_id</span> <span class="o">=</span> <span class="n">sheet_metadata_tf</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheetId'</span><span class="p">)</span> | ||
605 | <span class="n">sheet_url</span> <span class="o">=</span> <span class="s1">'https://docs.google.com/spreadsheets/d/</span><span class="si">{}</span><span class="s1">/edit#gid=</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet_id</span><span class="p">)</span> | ||
606 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'spreadsheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
607 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'sheetUrl'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_url</span> | ||
608 | <span class="n">sheet_metadata_tf</span><span class="p">[</span><span class="s1">'columns'</span><span class="p">]</span> <span class="o">=</span> <span class="n">columns</span> | ||
609 | <span class="k">return</span> <span class="n">sheet_metadata_tf</span></pre></div> | ||
610 | </div> | ||
611 | </div> | ||
612 | <div class='clearall'></div> | ||
613 | <div class='section' id='section-37'> | ||
614 | <div class='docs'> | ||
615 | <div class='octowrap'> | ||
616 | <a class='octothorpe' href='#section-37'>#</a> | ||
617 | </div> | ||
618 | <p>Convert Excel Date Serial Number (excel_date_sn) to datetime string timezone_str: defaults to</p> | ||
619 | </div> | ||
620 | <div class='code'> | ||
621 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">excel_to_dttm_str</span><span class="p">(</span><span class="n">excel_date_sn</span><span class="p">,</span> <span class="n">timezone_str</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
622 | </div> | ||
623 | </div> | ||
624 | <div class='clearall'></div> | ||
625 | <div class='section' id='section-38'> | ||
626 | <div class='docs'> | ||
627 | <div class='octowrap'> | ||
628 | <a class='octothorpe' href='#section-38'>#</a> | ||
629 | </div> | ||
630 | <p>UTC (which we assume is the timezone for ALL datetimes)</p> | ||
631 | </div> | ||
632 | <div class='code'> | ||
633 | <div class="highlight"><pre> <span class="k">if</span> <span class="ow">not</span> <span class="n">timezone_str</span><span class="p">:</span> | ||
634 | <span class="n">timezone_str</span> <span class="o">=</span> <span class="s1">'UTC'</span> | ||
635 | <span class="n">tzn</span> <span class="o">=</span> <span class="n">pytz</span><span class="o">.</span><span class="n">timezone</span><span class="p">(</span><span class="n">timezone_str</span><span class="p">)</span> | ||
636 | <span class="n">epoch_dttm</span> <span class="o">=</span> <span class="n">datetime</span><span class="p">(</span><span class="mi">1970</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span> | ||
637 | |||
638 | <span class="n">sec_per_day</span> <span class="o">=</span> <span class="mi">86400</span></pre></div> | ||
639 | </div> | ||
640 | </div> | ||
641 | <div class='clearall'></div> | ||
642 | <div class='section' id='section-39'> | ||
643 | <div class='docs'> | ||
644 | <div class='octowrap'> | ||
645 | <a class='octothorpe' href='#section-39'>#</a> | ||
646 | </div> | ||
647 | <p>1970-01-01T00:00:00Z, Lotus Notes Serial Number for Epoch Start Date</p> | ||
648 | </div> | ||
649 | <div class='code'> | ||
650 | <div class="highlight"><pre> <span class="n">excel_epoch</span> <span class="o">=</span> <span class="mi">25569</span></pre></div> | ||
651 | </div> | ||
652 | </div> | ||
653 | <div class='clearall'></div> | ||
654 | <div class='section' id='section-40'> | ||
655 | <div class='docs'> | ||
656 | <div class='octowrap'> | ||
657 | <a class='octothorpe' href='#section-40'>#</a> | ||
658 | </div> | ||
659 | <p>Seconds since Epoch, times the seconds per day => days since Epoch?</p> | ||
660 | </div> | ||
661 | <div class='code'> | ||
662 | <div class="highlight"><pre> <span class="n">epoch_sec</span> <span class="o">=</span> <span class="n">math</span><span class="o">.</span><span class="n">floor</span><span class="p">((</span><span class="n">excel_date_sn</span> <span class="o">-</span> <span class="n">excel_epoch</span><span class="p">)</span> <span class="o">*</span> <span class="n">sec_per_day</span><span class="p">)</span> | ||
663 | |||
664 | <span class="n">excel_dttm</span> <span class="o">=</span> <span class="n">epoch_dttm</span> <span class="o">+</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">epoch_sec</span><span class="p">)</span> | ||
665 | <span class="n">utc_dttm</span> <span class="o">=</span> <span class="n">tzn</span><span class="o">.</span><span class="n">localize</span><span class="p">(</span><span class="n">excel_dttm</span><span class="p">)</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">pytz</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> | ||
666 | <span class="n">utc_dttm_str</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">utils</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="n">utc_dttm</span><span class="p">)</span> | ||
667 | <span class="k">return</span> <span class="n">utc_dttm_str</span></pre></div> | ||
668 | </div> | ||
669 | </div> | ||
670 | <div class='clearall'></div> | ||
671 | <div class='section' id='section-41'> | ||
672 | <div class='docs'> | ||
673 | <div class='octowrap'> | ||
674 | <a class='octothorpe' href='#section-41'>#</a> | ||
675 | </div> | ||
676 | <hr /> | ||
677 | <h3>WARNING This next function is confusing</h3> | ||
678 | </div> | ||
679 | <div class='code'> | ||
680 | <div class="highlight"><pre></pre></div> | ||
681 | </div> | ||
682 | </div> | ||
683 | <div class='clearall'></div> | ||
684 | <div class='section' id='section-42'> | ||
685 | <div class='docs'> | ||
686 | <div class='octowrap'> | ||
687 | <a class='octothorpe' href='#section-42'>#</a> | ||
688 | </div> | ||
689 | <p>In general, the point of the function is to transform the field based on the data type that the | ||
690 | API tells us. It loops over every row and then every column in the row.</p> | ||
691 | </div> | ||
692 | <div class='code'> | ||
693 | <div class="highlight"><pre></pre></div> | ||
694 | </div> | ||
695 | </div> | ||
696 | <div class='clearall'></div> | ||
697 | <div class='section' id='section-43'> | ||
698 | <div class='docs'> | ||
699 | <div class='octowrap'> | ||
700 | <a class='octothorpe' href='#section-43'>#</a> | ||
701 | </div> | ||
702 | <p>For the <code>TIME</code> fields, there’s no reason it should work. And for some cases, the value returned is | ||
703 | just wrong.</p> | ||
704 | </div> | ||
705 | <div class='code'> | ||
706 | <div class="highlight"><pre></pre></div> | ||
707 | </div> | ||
708 | </div> | ||
709 | <div class='clearall'></div> | ||
710 | <div class='section' id='section-44'> | ||
711 | <div class='docs'> | ||
712 | <div class='octowrap'> | ||
713 | <a class='octothorpe' href='#section-44'>#</a> | ||
714 | </div> | ||
715 | <p>You can look at the code for <code>timedelta</code> and you would see that this constructor wants to | ||
716 | normalize the input of 6 units into 3 (you can create the object with <code>years</code>, <code>months</code>, <code>days</code>, | ||
717 | <code>hours</code>, <code>minutes</code>, and <code>seconds</code>. But it will convert values into just <code>days</code>, <code>hours</code>, and | ||
718 | <code>seconds</code>).</p> | ||
719 | </div> | ||
720 | <div class='code'> | ||
721 | <div class="highlight"><pre></pre></div> | ||
722 | </div> | ||
723 | </div> | ||
724 | <div class='clearall'></div> | ||
725 | <div class='section' id='section-45'> | ||
726 | <div class='docs'> | ||
727 | <div class='octowrap'> | ||
728 | <a class='octothorpe' href='#section-45'>#</a> | ||
729 | </div> | ||
730 | <p><em>Disclaimer I don’t have the exact units, but the spirit of | ||
731 | the idea is here.</em></p> | ||
732 | </div> | ||
733 | <div class='code'> | ||
734 | <div class="highlight"><pre></pre></div> | ||
735 | </div> | ||
736 | </div> | ||
737 | <div class='clearall'></div> | ||
738 | <div class='section' id='section-46'> | ||
739 | <div class='docs'> | ||
740 | <div class='octowrap'> | ||
741 | <a class='octothorpe' href='#section-46'>#</a> | ||
742 | </div> | ||
743 | <p>When we pass in <code>seconds</code> here as the value we get from the API times the number of seconds in a | ||
744 | day, how <code>timedelta</code> does its normalization gives us an incorrect value. It takes the input to | ||
745 | <code>seconds</code> and passes that to <code>divmod()</code> which returns a 2-ple as the result. The first element is | ||
746 | our input integer divided by the number of seconds in a day. The second element is our input mod | ||
747 | the number of seconds in a day. Then these results are added to the rest of the normalization and | ||
748 | we get the correct time value back out. It’s easy to imagine that since we don’t pass in a <code>days</code> | ||
749 | argument, our <code>divmod</code>‘s days output is just added to zero. The <code>__str__()</code> for <code>timedelta</code> must | ||
750 | be something like <code>"{my_days} days, {time_since_midnight(my_seconds)}"</code>, which is essentially what | ||
751 | we get after this transform function.</p> | ||
752 | </div> | ||
753 | <div class='code'> | ||
754 | <div class="highlight"><pre></pre></div> | ||
755 | </div> | ||
756 | </div> | ||
757 | <div class='clearall'></div> | ||
758 | <div class='section' id='section-47'> | ||
759 | <div class='docs'> | ||
760 | <div class='octowrap'> | ||
761 | <a class='octothorpe' href='#section-47'>#</a> | ||
762 | </div> | ||
763 | <hr /> | ||
764 | </div> | ||
765 | <div class='code'> | ||
766 | <div class="highlight"><pre></pre></div> | ||
767 | </div> | ||
768 | </div> | ||
769 | <div class='clearall'></div> | ||
770 | <div class='section' id='section-48'> | ||
771 | <div class='docs'> | ||
772 | <div class='octowrap'> | ||
773 | <a class='octothorpe' href='#section-48'>#</a> | ||
774 | </div> | ||
775 | <p>add spreadsheet_id, sheet_id, and row, convert dates/times Convert from array of values to</p> | ||
776 | </div> | ||
777 | <div class='code'> | ||
778 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">transform_sheet_data</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet_id</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="n">from_row</span><span class="p">,</span> <span class="n">columns</span><span class="p">,</span> <span class="n">sheet_data_rows</span><span class="p">):</span></pre></div> | ||
779 | </div> | ||
780 | </div> | ||
781 | <div class='clearall'></div> | ||
782 | <div class='section' id='section-49'> | ||
783 | <div class='docs'> | ||
784 | <div class='octowrap'> | ||
785 | <a class='octothorpe' href='#section-49'>#</a> | ||
786 | </div> | ||
787 | <p>JSON with column names as keys</p> | ||
788 | </div> | ||
789 | <div class='code'> | ||
790 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span> <span class="o">=</span> <span class="p">[]</span> | ||
791 | <span class="n">row_num</span> <span class="o">=</span> <span class="n">from_row</span></pre></div> | ||
792 | </div> | ||
793 | </div> | ||
794 | <div class='clearall'></div> | ||
795 | <div class='section' id='section-50'> | ||
796 | <div class='docs'> | ||
797 | <div class='octowrap'> | ||
798 | <a class='octothorpe' href='#section-50'>#</a> | ||
799 | </div> | ||
800 | <p>Create sorted list of columns based on columnIndex</p> | ||
801 | </div> | ||
802 | <div class='code'> | ||
803 | <div class="highlight"><pre> <span class="n">cols</span> <span class="o">=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">columns</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">i</span><span class="p">:</span> <span class="n">i</span><span class="p">[</span><span class="s1">'columnIndex'</span><span class="p">])</span> | ||
804 | |||
805 | <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">sheet_data_rows</span><span class="p">:</span></pre></div> | ||
806 | </div> | ||
807 | </div> | ||
808 | <div class='clearall'></div> | ||
809 | <div class='section' id='section-51'> | ||
810 | <div class='docs'> | ||
811 | <div class='octowrap'> | ||
812 | <a class='octothorpe' href='#section-51'>#</a> | ||
813 | </div> | ||
814 | <p>If empty row, SKIP</p> | ||
815 | </div> | ||
816 | <div class='code'> | ||
817 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">row</span> <span class="o">==</span> <span class="p">[]:</span> | ||
818 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'EMPTY ROW: </span><span class="si">{}</span><span class="s1">, SKIPPING'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">row_num</span><span class="p">))</span> | ||
819 | <span class="k">else</span><span class="p">:</span> | ||
820 | <span class="n">sheet_data_row_tf</span> <span class="o">=</span> <span class="p">{}</span></pre></div> | ||
821 | </div> | ||
822 | </div> | ||
823 | <div class='clearall'></div> | ||
824 | <div class='section' id='section-52'> | ||
825 | <div class='docs'> | ||
826 | <div class='octowrap'> | ||
827 | <a class='octothorpe' href='#section-52'>#</a> | ||
828 | </div> | ||
829 | <p>Add spreadsheet_id, sheet_id, and row</p> | ||
830 | </div> | ||
831 | <div class='code'> | ||
832 | <div class="highlight"><pre> <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_spreadsheet_id'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
833 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_sheet_id'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_id</span> | ||
834 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="s1">'__sdc_row'</span><span class="p">]</span> <span class="o">=</span> <span class="n">row_num</span> | ||
835 | <span class="n">col_num</span> <span class="o">=</span> <span class="mi">1</span> | ||
836 | <span class="k">for</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">row</span><span class="p">:</span></pre></div> | ||
837 | </div> | ||
838 | </div> | ||
839 | <div class='clearall'></div> | ||
840 | <div class='section' id='section-53'> | ||
841 | <div class='docs'> | ||
842 | <div class='octowrap'> | ||
843 | <a class='octothorpe' href='#section-53'>#</a> | ||
844 | </div> | ||
845 | <p>Select column metadata based on column index</p> | ||
846 | </div> | ||
847 | <div class='code'> | ||
848 | <div class="highlight"><pre> <span class="n">col</span> <span class="o">=</span> <span class="n">cols</span><span class="p">[</span><span class="n">col_num</span> <span class="o">-</span> <span class="mi">1</span><span class="p">]</span> | ||
849 | <span class="n">col_skipped</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnSkipped'</span><span class="p">)</span> | ||
850 | <span class="k">if</span> <span class="ow">not</span> <span class="n">col_skipped</span><span class="p">:</span></pre></div> | ||
851 | </div> | ||
852 | </div> | ||
853 | <div class='clearall'></div> | ||
854 | <div class='section' id='section-54'> | ||
855 | <div class='docs'> | ||
856 | <div class='octowrap'> | ||
857 | <a class='octothorpe' href='#section-54'>#</a> | ||
858 | </div> | ||
859 | <p>Get column metadata</p> | ||
860 | </div> | ||
861 | <div class='code'> | ||
862 | <div class="highlight"><pre> <span class="n">col_name</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnName'</span><span class="p">)</span> | ||
863 | <span class="n">col_type</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnType'</span><span class="p">)</span> | ||
864 | <span class="n">col_letter</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnLetter'</span><span class="p">)</span></pre></div> | ||
865 | </div> | ||
866 | </div> | ||
867 | <div class='clearall'></div> | ||
868 | <div class='section' id='section-55'> | ||
869 | <div class='docs'> | ||
870 | <div class='octowrap'> | ||
871 | <a class='octothorpe' href='#section-55'>#</a> | ||
872 | </div> | ||
873 | <p>NULL values</p> | ||
874 | </div> | ||
875 | <div class='code'> | ||
876 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">value</span> <span class="o">==</span> <span class="s1">''</span><span class="p">:</span> | ||
877 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">None</span></pre></div> | ||
878 | </div> | ||
879 | </div> | ||
880 | <div class='clearall'></div> | ||
881 | <div class='section' id='section-56'> | ||
882 | <div class='docs'> | ||
883 | <div class='octowrap'> | ||
884 | <a class='octothorpe' href='#section-56'>#</a> | ||
885 | </div> | ||
886 | <p>Convert dates/times from Lotus Notes Serial Numbers | ||
887 | DATE-TIME</p> | ||
888 | </div> | ||
889 | <div class='code'> | ||
890 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.DATE_TIME'</span><span class="p">:</span> | ||
891 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
892 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">excel_to_dttm_str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
893 | <span class="k">else</span><span class="p">:</span> | ||
894 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
895 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
896 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
897 | </div> | ||
898 | </div> | ||
899 | <div class='clearall'></div> | ||
900 | <div class='section' id='section-57'> | ||
901 | <div class='docs'> | ||
902 | <div class='octowrap'> | ||
903 | <a class='octothorpe' href='#section-57'>#</a> | ||
904 | </div> | ||
905 | <p>DATE</p> | ||
906 | </div> | ||
907 | <div class='code'> | ||
908 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.DATE'</span><span class="p">:</span> | ||
909 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
910 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">excel_to_dttm_str</span><span class="p">(</span><span class="n">value</span><span class="p">)[:</span><span class="mi">10</span><span class="p">]</span> | ||
911 | <span class="k">else</span><span class="p">:</span> | ||
912 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
913 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
914 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
915 | </div> | ||
916 | </div> | ||
917 | <div class='clearall'></div> | ||
918 | <div class='section' id='section-58'> | ||
919 | <div class='docs'> | ||
920 | <div class='octowrap'> | ||
921 | <a class='octothorpe' href='#section-58'>#</a> | ||
922 | </div> | ||
923 | <p>TIME ONLY (NO DATE)</p> | ||
924 | </div> | ||
925 | <div class='code'> | ||
926 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType.TIME'</span><span class="p">:</span> | ||
927 | |||
928 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span> | ||
929 | <span class="k">try</span><span class="p">:</span> | ||
930 | <span class="n">total_secs</span> <span class="o">=</span> <span class="n">value</span> <span class="o">*</span> <span class="mi">86400</span></pre></div> | ||
931 | </div> | ||
932 | </div> | ||
933 | <div class='clearall'></div> | ||
934 | <div class='section' id='section-59'> | ||
935 | <div class='docs'> | ||
936 | <div class='octowrap'> | ||
937 | <a class='octothorpe' href='#section-59'>#</a> | ||
938 | </div> | ||
939 | <p>Create string formatted like HH:MM:SS</p> | ||
940 | </div> | ||
941 | <div class='code'> | ||
942 | <div class="highlight"><pre> <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="n">total_secs</span><span class="p">))</span> | ||
943 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
944 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
945 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
946 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
947 | <span class="k">else</span><span class="p">:</span> | ||
948 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></pre></div> | ||
949 | </div> | ||
950 | </div> | ||
951 | <div class='clearall'></div> | ||
952 | <div class='section' id='section-60'> | ||
953 | <div class='docs'> | ||
954 | <div class='octowrap'> | ||
955 | <a class='octothorpe' href='#section-60'>#</a> | ||
956 | </div> | ||
957 | <p>NUMBER (INTEGER AND FLOAT)</p> | ||
958 | </div> | ||
959 | <div class='code'> | ||
960 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'numberType'</span><span class="p">:</span> | ||
961 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | ||
962 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
963 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span></pre></div> | ||
964 | </div> | ||
965 | </div> | ||
966 | <div class='clearall'></div> | ||
967 | <div class='section' id='section-61'> | ||
968 | <div class='docs'> | ||
969 | <div class='octowrap'> | ||
970 | <a class='octothorpe' href='#section-61'>#</a> | ||
971 | </div> | ||
972 | <p>Determine float decimal digits</p> | ||
973 | </div> | ||
974 | <div class='code'> | ||
975 | <div class="highlight"><pre> <span class="n">decimal_digits</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)[::</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">find</span><span class="p">(</span><span class="s1">'.'</span><span class="p">)</span> | ||
976 | <span class="k">if</span> <span class="n">decimal_digits</span> <span class="o">></span> <span class="mi">15</span><span class="p">:</span> | ||
977 | <span class="k">try</span><span class="p">:</span></pre></div> | ||
978 | </div> | ||
979 | </div> | ||
980 | <div class='clearall'></div> | ||
981 | <div class='section' id='section-62'> | ||
982 | <div class='docs'> | ||
983 | <div class='octowrap'> | ||
984 | <a class='octothorpe' href='#section-62'>#</a> | ||
985 | </div> | ||
986 | <p>ROUND to multipleOf: 1e-15</p> | ||
987 | </div> | ||
988 | <div class='code'> | ||
989 | <div class="highlight"><pre> <span class="n">col_val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="mi">15</span><span class="p">))</span> | ||
990 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
991 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
992 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
993 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
994 | <span class="k">else</span><span class="p">:</span> <span class="c1"># decimal_digits <= 15, no rounding</span> | ||
995 | <span class="k">try</span><span class="p">:</span> | ||
996 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
997 | <span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span> | ||
998 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
999 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1000 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1001 | <span class="k">else</span><span class="p">:</span> | ||
1002 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1003 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR: SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1004 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row_num</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
1005 | </div> | ||
1006 | </div> | ||
1007 | <div class='clearall'></div> | ||
1008 | <div class='section' id='section-63'> | ||
1009 | <div class='docs'> | ||
1010 | <div class='octowrap'> | ||
1011 | <a class='octothorpe' href='#section-63'>#</a> | ||
1012 | </div> | ||
1013 | <p>STRING</p> | ||
1014 | </div> | ||
1015 | <div class='code'> | ||
1016 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'stringValue'</span><span class="p">:</span> | ||
1017 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></pre></div> | ||
1018 | </div> | ||
1019 | </div> | ||
1020 | <div class='clearall'></div> | ||
1021 | <div class='section' id='section-64'> | ||
1022 | <div class='docs'> | ||
1023 | <div class='octowrap'> | ||
1024 | <a class='octothorpe' href='#section-64'>#</a> | ||
1025 | </div> | ||
1026 | <p>BOOLEAN</p> | ||
1027 | </div> | ||
1028 | <div class='code'> | ||
1029 | <div class="highlight"><pre> <span class="k">elif</span> <span class="n">col_type</span> <span class="o">==</span> <span class="s1">'boolValue'</span><span class="p">:</span> | ||
1030 | <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">bool</span><span class="p">):</span> | ||
1031 | <span class="n">col_val</span> <span class="o">=</span> <span class="n">value</span> | ||
1032 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> | ||
1033 | <span class="k">if</span> <span class="n">value</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'true'</span><span class="p">,</span> <span class="s1">'t'</span><span class="p">,</span> <span class="s1">'yes'</span><span class="p">,</span> <span class="s1">'y'</span><span class="p">):</span> | ||
1034 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">True</span> | ||
1035 | <span class="k">elif</span> <span class="n">value</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span> <span class="ow">in</span> <span class="p">(</span><span class="s1">'false'</span><span class="p">,</span> <span class="s1">'f'</span><span class="p">,</span> <span class="s1">'no'</span><span class="p">,</span> <span class="s1">'n'</span><span class="p">):</span> | ||
1036 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">False</span> | ||
1037 | <span class="k">else</span><span class="p">:</span> | ||
1038 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1039 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1040 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1041 | <span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span> | ||
1042 | <span class="k">if</span> <span class="n">value</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">):</span> | ||
1043 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">True</span> | ||
1044 | <span class="k">elif</span> <span class="n">value</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span> | ||
1045 | <span class="n">col_val</span> <span class="o">=</span> <span class="kc">False</span> | ||
1046 | <span class="k">else</span><span class="p">:</span> | ||
1047 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1048 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1049 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span></pre></div> | ||
1050 | </div> | ||
1051 | </div> | ||
1052 | <div class='clearall'></div> | ||
1053 | <div class='section' id='section-65'> | ||
1054 | <div class='docs'> | ||
1055 | <div class='octowrap'> | ||
1056 | <a class='octothorpe' href='#section-65'>#</a> | ||
1057 | </div> | ||
1058 | <p>OTHER: Convert everything else to a string</p> | ||
1059 | </div> | ||
1060 | <div class='code'> | ||
1061 | <div class="highlight"><pre> <span class="k">else</span><span class="p">:</span> | ||
1062 | <span class="n">col_val</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> | ||
1063 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'WARNING: POSSIBLE DATA TYPE ERROR; SHEET: </span><span class="si">{}</span><span class="s1">, COL: </span><span class="si">{}</span><span class="s1">, CELL: </span><span class="si">{}{}</span><span class="s1">, TYPE: </span><span class="si">{}</span><span class="s1">, VALUE: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1064 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">col_name</span><span class="p">,</span> <span class="n">col_letter</span><span class="p">,</span> <span class="n">row</span><span class="p">,</span> <span class="n">col_type</span><span class="p">,</span> <span class="n">value</span><span class="p">))</span> | ||
1065 | <span class="n">sheet_data_row_tf</span><span class="p">[</span><span class="n">col_name</span><span class="p">]</span> <span class="o">=</span> <span class="n">col_val</span> | ||
1066 | <span class="n">col_num</span> <span class="o">=</span> <span class="n">col_num</span> <span class="o">+</span> <span class="mi">1</span></pre></div> | ||
1067 | </div> | ||
1068 | </div> | ||
1069 | <div class='clearall'></div> | ||
1070 | <div class='section' id='section-66'> | ||
1071 | <div class='docs'> | ||
1072 | <div class='octowrap'> | ||
1073 | <a class='octothorpe' href='#section-66'>#</a> | ||
1074 | </div> | ||
1075 | <p>APPEND non-empty row</p> | ||
1076 | </div> | ||
1077 | <div class='code'> | ||
1078 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_data_row_tf</span><span class="p">)</span> | ||
1079 | <span class="n">row_num</span> <span class="o">=</span> <span class="n">row_num</span> <span class="o">+</span> <span class="mi">1</span> | ||
1080 | <span class="k">return</span> <span class="n">sheet_data_tf</span><span class="p">,</span> <span class="n">row_num</span></pre></div> | ||
1081 | </div> | ||
1082 | </div> | ||
1083 | <div class='clearall'></div> | ||
1084 | <div class='section' id='section-67'> | ||
1085 | <div class='docs'> | ||
1086 | <div class='octowrap'> | ||
1087 | <a class='octothorpe' href='#section-67'>#</a> | ||
1088 | </div> | ||
1089 | <hr /> | ||
1090 | </div> | ||
1091 | <div class='code'> | ||
1092 | <div class="highlight"><pre></pre></div> | ||
1093 | </div> | ||
1094 | </div> | ||
1095 | <div class='clearall'></div> | ||
1096 | <div class='section' id='section-68'> | ||
1097 | <div class='docs'> | ||
1098 | <div class='octowrap'> | ||
1099 | <a class='octothorpe' href='#section-68'>#</a> | ||
1100 | </div> | ||
1101 | <h1>Main Functions</h1> | ||
1102 | </div> | ||
1103 | <div class='code'> | ||
1104 | <div class="highlight"><pre></pre></div> | ||
1105 | </div> | ||
1106 | </div> | ||
1107 | <div class='clearall'></div> | ||
1108 | <div class='section' id='section-69'> | ||
1109 | <div class='docs'> | ||
1110 | <div class='octowrap'> | ||
1111 | <a class='octothorpe' href='#section-69'>#</a> | ||
1112 | </div> | ||
1113 | <hr /> | ||
1114 | </div> | ||
1115 | <div class='code'> | ||
1116 | <div class="highlight"><pre></pre></div> | ||
1117 | </div> | ||
1118 | </div> | ||
1119 | <div class='clearall'></div> | ||
1120 | <div class='section' id='section-70'> | ||
1121 | <div class='docs'> | ||
1122 | <div class='octowrap'> | ||
1123 | <a class='octothorpe' href='#section-70'>#</a> | ||
1124 | </div> | ||
1125 | <p>Transform/validate batch of records w/ schema and sent to target</p> | ||
1126 | </div> | ||
1127 | <div class='code'> | ||
1128 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">process_records</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">records</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">,</span> <span class="n">version</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
1129 | </div> | ||
1130 | </div> | ||
1131 | <div class='clearall'></div> | ||
1132 | <div class='section' id='section-71'> | ||
1133 | <div class='docs'> | ||
1134 | <div class='octowrap'> | ||
1135 | <a class='octothorpe' href='#section-71'>#</a> | ||
1136 | </div> | ||
1137 | |||
1138 | </div> | ||
1139 | <div class='code'> | ||
1140 | <div class="highlight"><pre> <span class="n">stream</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> | ||
1141 | <span class="n">schema</span> <span class="o">=</span> <span class="n">stream</span><span class="o">.</span><span class="n">schema</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span> | ||
1142 | <span class="n">stream_metadata</span> <span class="o">=</span> <span class="n">metadata</span><span class="o">.</span><span class="n">to_map</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">metadata</span><span class="p">)</span> | ||
1143 | <span class="k">with</span> <span class="n">metrics</span><span class="o">.</span><span class="n">record_counter</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span> <span class="k">as</span> <span class="n">counter</span><span class="p">:</span> | ||
1144 | <span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="n">records</span><span class="p">:</span> | ||
1145 | <span class="k">with</span> <span class="n">Transformer</span><span class="p">()</span> <span class="k">as</span> <span class="n">transformer</span><span class="p">:</span> | ||
1146 | <span class="k">try</span><span class="p">:</span> | ||
1147 | <span class="n">transformed_record</span> <span class="o">=</span> <span class="n">transformer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">record</span><span class="p">,</span> <span class="n">schema</span><span class="p">,</span> <span class="n">stream_metadata</span><span class="p">)</span> | ||
1148 | <span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">err</span><span class="p">:</span> | ||
1149 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="s1">'</span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">err</span><span class="p">))</span> | ||
1150 | <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="n">err</span><span class="p">)</span> | ||
1151 | <span class="n">write_record</span><span class="p">(</span> | ||
1152 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1153 | <span class="n">record</span><span class="o">=</span><span class="n">transformed_record</span><span class="p">,</span> | ||
1154 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">,</span> | ||
1155 | <span class="n">version</span><span class="o">=</span><span class="n">version</span><span class="p">)</span> | ||
1156 | <span class="n">counter</span><span class="o">.</span><span class="n">increment</span><span class="p">()</span> | ||
1157 | <span class="k">return</span> <span class="n">counter</span><span class="o">.</span><span class="n">value</span></pre></div> | ||
1158 | </div> | ||
1159 | </div> | ||
1160 | <div class='clearall'></div> | ||
1161 | <div class='section' id='section-72'> | ||
1162 | <div class='docs'> | ||
1163 | <div class='octowrap'> | ||
1164 | <a class='octothorpe' href='#section-72'>#</a> | ||
1165 | </div> | ||
1166 | <p>This is just a pass-through to <code>process_records()</code></p> | ||
1167 | </div> | ||
1168 | <div class='code'> | ||
1169 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">records</span><span class="p">,</span> <span class="n">time_extracted</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span></pre></div> | ||
1170 | </div> | ||
1171 | </div> | ||
1172 | <div class='clearall'></div> | ||
1173 | <div class='section' id='section-73'> | ||
1174 | <div class='docs'> | ||
1175 | <div class='octowrap'> | ||
1176 | <a class='octothorpe' href='#section-73'>#</a> | ||
1177 | </div> | ||
1178 | |||
1179 | </div> | ||
1180 | <div class='code'> | ||
1181 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">stream_name</span> <span class="ow">in</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1182 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'STARTED Syncing </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">))</span> | ||
1183 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1184 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="n">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1185 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stream: </span><span class="si">{}</span><span class="s1">, selected_fields: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_fields</span><span class="p">))</span> | ||
1186 | <span class="n">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">)</span> | ||
1187 | <span class="k">if</span> <span class="ow">not</span> <span class="n">time_extracted</span><span class="p">:</span> | ||
1188 | <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> | ||
1189 | <span class="n">record_count</span> <span class="o">=</span> <span class="n">process_records</span><span class="p">(</span> | ||
1190 | <span class="n">catalog</span><span class="o">=</span><span class="n">catalog</span><span class="p">,</span> | ||
1191 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1192 | <span class="n">records</span><span class="o">=</span><span class="n">records</span><span class="p">,</span> | ||
1193 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">time_extracted</span><span class="p">)</span> | ||
1194 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'FINISHED Syncing </span><span class="si">{}</span><span class="s1">, Total Records: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">record_count</span><span class="p">))</span> | ||
1195 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div> | ||
1196 | </div> | ||
1197 | </div> | ||
1198 | <div class='clearall'></div> | ||
1199 | <div class='section' id='section-74'> | ||
1200 | <div class='docs'> | ||
1201 | <div class='octowrap'> | ||
1202 | <a class='octothorpe' href='#section-74'>#</a> | ||
1203 | </div> | ||
1204 | <p>See top of file for notes</p> | ||
1205 | </div> | ||
1206 | <div class='code'> | ||
1207 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">sync</span><span class="p">(</span><span class="n">client</span><span class="p">,</span> <span class="n">config</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">):</span> | ||
1208 | <span class="n">start_date</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'start_date'</span><span class="p">)</span> | ||
1209 | <span class="n">spreadsheet_id</span> <span class="o">=</span> <span class="n">config</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'spreadsheet_id'</span><span class="p">)</span> | ||
1210 | |||
1211 | <span class="n">last_stream</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">get_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">)</span> | ||
1212 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'last/currently syncing stream: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">last_stream</span><span class="p">))</span> | ||
1213 | |||
1214 | <span class="n">selected_streams</span> <span class="o">=</span> <span class="p">[]</span> | ||
1215 | <span class="k">for</span> <span class="n">stream</span> <span class="ow">in</span> <span class="n">catalog</span><span class="o">.</span><span class="n">get_selected_streams</span><span class="p">(</span><span class="n">state</span><span class="p">):</span> | ||
1216 | <span class="n">selected_streams</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">stream</span><span class="o">.</span><span class="n">stream</span><span class="p">)</span> | ||
1217 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'selected_streams: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">selected_streams</span><span class="p">))</span> | ||
1218 | |||
1219 | <span class="k">if</span> <span class="ow">not</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1220 | <span class="k">return</span></pre></div> | ||
1221 | </div> | ||
1222 | </div> | ||
1223 | <div class='clearall'></div> | ||
1224 | <div class='section' id='section-75'> | ||
1225 | <div class='docs'> | ||
1226 | <div class='octowrap'> | ||
1227 | <a class='octothorpe' href='#section-75'>#</a> | ||
1228 | </div> | ||
1229 | <h2>FILE_METADATA</h2> | ||
1230 | </div> | ||
1231 | <div class='code'> | ||
1232 | <div class="highlight"><pre> <span class="n">file_metadata</span> <span class="o">=</span> <span class="p">{}</span> | ||
1233 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'file_metadata'</span> | ||
1234 | <span class="n">file_metadata_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
1235 | </div> | ||
1236 | </div> | ||
1237 | <div class='clearall'></div> | ||
1238 | <div class='section' id='section-76'> | ||
1239 | <div class='docs'> | ||
1240 | <div class='octowrap'> | ||
1241 | <a class='octothorpe' href='#section-76'>#</a> | ||
1242 | </div> | ||
1243 | <p>GET file_metadata</p> | ||
1244 | </div> | ||
1245 | <div class='code'> | ||
1246 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'GET file_meatadata'</span><span class="p">)</span> | ||
1247 | <span class="n">file_metadata</span><span class="p">,</span> <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span><span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1248 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">file_metadata_config</span><span class="p">,</span> | ||
1249 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1250 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">)</span></pre></div> | ||
1251 | </div> | ||
1252 | </div> | ||
1253 | <div class='clearall'></div> | ||
1254 | <div class='section' id='section-77'> | ||
1255 | <div class='docs'> | ||
1256 | <div class='octowrap'> | ||
1257 | <a class='octothorpe' href='#section-77'>#</a> | ||
1258 | </div> | ||
1259 | <p>Transform file_metadata</p> | ||
1260 | </div> | ||
1261 | <div class='code'> | ||
1262 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Transform file_meatadata'</span><span class="p">)</span> | ||
1263 | <span class="n">file_metadata_tf</span> <span class="o">=</span> <span class="n">transform_file_metadata</span><span class="p">(</span><span class="n">file_metadata</span><span class="p">)</span></pre></div> | ||
1264 | </div> | ||
1265 | </div> | ||
1266 | <div class='clearall'></div> | ||
1267 | <div class='section' id='section-78'> | ||
1268 | <div class='docs'> | ||
1269 | <div class='octowrap'> | ||
1270 | <a class='octothorpe' href='#section-78'>#</a> | ||
1271 | </div> | ||
1272 | <p>Check if file has changed, if not exit</p> | ||
1273 | </div> | ||
1274 | <div class='code'> | ||
1275 | <div class="highlight"><pre> <span class="n">last_datetime</span> <span class="o">=</span> <span class="n">strptime_to_utc</span><span class="p">(</span><span class="n">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">stream_name</span><span class="p">,</span> <span class="n">start_date</span><span class="p">))</span> | ||
1276 | <span class="n">this_datetime</span> <span class="o">=</span> <span class="n">strptime_to_utc</span><span class="p">(</span><span class="n">file_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'modifiedTime'</span><span class="p">))</span> | ||
1277 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'last_datetime = </span><span class="si">{}</span><span class="s1">, this_datetime = </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">last_datetime</span><span class="p">,</span> <span class="n">this_datetime</span><span class="p">))</span> | ||
1278 | <span class="k">if</span> <span class="n">this_datetime</span> <span class="o"><=</span> <span class="n">last_datetime</span><span class="p">:</span> | ||
1279 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'this_datetime <= last_datetime, FILE NOT CHANGED. EXITING.'</span><span class="p">)</span> | ||
1280 | <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="s1">'file_metadata'</span><span class="p">,</span> <span class="n">strftime</span><span class="p">(</span><span class="n">this_datetime</span><span class="p">))</span> | ||
1281 | <span class="k">return</span></pre></div> | ||
1282 | </div> | ||
1283 | </div> | ||
1284 | <div class='clearall'></div> | ||
1285 | <div class='section' id='section-79'> | ||
1286 | <div class='docs'> | ||
1287 | <div class='octowrap'> | ||
1288 | <a class='octothorpe' href='#section-79'>#</a> | ||
1289 | </div> | ||
1290 | <p>Write file_metadata records if selected</p> | ||
1291 | </div> | ||
1292 | <div class='code'> | ||
1293 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">file_metadata_tf</span><span class="p">,</span> <span class="n">time_extracted</span><span class="p">)</span></pre></div> | ||
1294 | </div> | ||
1295 | </div> | ||
1296 | <div class='clearall'></div> | ||
1297 | <div class='section' id='section-80'> | ||
1298 | <div class='docs'> | ||
1299 | <div class='octowrap'> | ||
1300 | <a class='octothorpe' href='#section-80'>#</a> | ||
1301 | </div> | ||
1302 | <h2>SPREADSHEET_METADATA</h2> | ||
1303 | </div> | ||
1304 | <div class='code'> | ||
1305 | <div class="highlight"><pre> <span class="n">spreadsheet_metadata</span> <span class="o">=</span> <span class="p">{}</span> | ||
1306 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'spreadsheet_metadata'</span> | ||
1307 | <span class="n">spreadsheet_metadata_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">stream_name</span><span class="p">)</span></pre></div> | ||
1308 | </div> | ||
1309 | </div> | ||
1310 | <div class='clearall'></div> | ||
1311 | <div class='section' id='section-81'> | ||
1312 | <div class='docs'> | ||
1313 | <div class='octowrap'> | ||
1314 | <a class='octothorpe' href='#section-81'>#</a> | ||
1315 | </div> | ||
1316 | <p>GET spreadsheet_metadata</p> | ||
1317 | </div> | ||
1318 | <div class='code'> | ||
1319 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'GET spreadsheet_meatadata'</span><span class="p">)</span> | ||
1320 | <span class="n">spreadsheet_metadata</span><span class="p">,</span> <span class="n">ss_time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span> | ||
1321 | <span class="n">stream_name</span><span class="o">=</span><span class="n">stream_name</span><span class="p">,</span> | ||
1322 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">spreadsheet_metadata_config</span><span class="p">,</span> | ||
1323 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1324 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">)</span></pre></div> | ||
1325 | </div> | ||
1326 | </div> | ||
1327 | <div class='clearall'></div> | ||
1328 | <div class='section' id='section-82'> | ||
1329 | <div class='docs'> | ||
1330 | <div class='octowrap'> | ||
1331 | <a class='octothorpe' href='#section-82'>#</a> | ||
1332 | </div> | ||
1333 | <p>Transform spreadsheet_metadata</p> | ||
1334 | </div> | ||
1335 | <div class='code'> | ||
1336 | <div class="highlight"><pre> <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Transform spreadsheet_meatadata'</span><span class="p">)</span> | ||
1337 | <span class="n">spreadsheet_metadata_tf</span> <span class="o">=</span> <span class="n">transform_spreadsheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_metadata</span><span class="p">)</span></pre></div> | ||
1338 | </div> | ||
1339 | </div> | ||
1340 | <div class='clearall'></div> | ||
1341 | <div class='section' id='section-83'> | ||
1342 | <div class='docs'> | ||
1343 | <div class='octowrap'> | ||
1344 | <a class='octothorpe' href='#section-83'>#</a> | ||
1345 | </div> | ||
1346 | <p>Write spreadsheet_metadata records if selected</p> | ||
1347 | </div> | ||
1348 | <div class='code'> | ||
1349 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">spreadsheet_metadata_tf</span><span class="p">,</span> \ | ||
1350 | <span class="n">ss_time_extracted</span><span class="p">)</span></pre></div> | ||
1351 | </div> | ||
1352 | </div> | ||
1353 | <div class='clearall'></div> | ||
1354 | <div class='section' id='section-84'> | ||
1355 | <div class='docs'> | ||
1356 | <div class='octowrap'> | ||
1357 | <a class='octothorpe' href='#section-84'>#</a> | ||
1358 | </div> | ||
1359 | <h2>SHEET_METADATA and SHEET_DATA</h2> | ||
1360 | </div> | ||
1361 | <div class='code'> | ||
1362 | <div class="highlight"><pre> <span class="n">sheets</span> <span class="o">=</span> <span class="n">spreadsheet_metadata</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheets'</span><span class="p">)</span> | ||
1363 | <span class="n">sheet_metadata</span> <span class="o">=</span> <span class="p">[]</span> | ||
1364 | <span class="n">sheets_loaded</span> <span class="o">=</span> <span class="p">[]</span> | ||
1365 | <span class="n">sheets_loaded_config</span> <span class="o">=</span> <span class="n">STREAMS</span><span class="p">[</span><span class="s1">'sheets_loaded'</span><span class="p">]</span> | ||
1366 | <span class="k">if</span> <span class="n">sheets</span><span class="p">:</span></pre></div> | ||
1367 | </div> | ||
1368 | </div> | ||
1369 | <div class='clearall'></div> | ||
1370 | <div class='section' id='section-85'> | ||
1371 | <div class='docs'> | ||
1372 | <div class='octowrap'> | ||
1373 | <a class='octothorpe' href='#section-85'>#</a> | ||
1374 | </div> | ||
1375 | <p>Loop thru sheets (worksheet tabs) in spreadsheet</p> | ||
1376 | </div> | ||
1377 | <div class='code'> | ||
1378 | <div class="highlight"><pre> <span class="k">for</span> <span class="n">sheet</span> <span class="ow">in</span> <span class="n">sheets</span><span class="p">:</span> | ||
1379 | <span class="n">sheet_title</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'title'</span><span class="p">)</span> | ||
1380 | <span class="n">sheet_id</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'sheetId'</span><span class="p">)</span></pre></div> | ||
1381 | </div> | ||
1382 | </div> | ||
1383 | <div class='clearall'></div> | ||
1384 | <div class='section' id='section-86'> | ||
1385 | <div class='docs'> | ||
1386 | <div class='octowrap'> | ||
1387 | <a class='octothorpe' href='#section-86'>#</a> | ||
1388 | </div> | ||
1389 | <h3>Sheet_Metadata</h3> | ||
1390 | <p>GET sheet_metadata and columns</p> | ||
1391 | </div> | ||
1392 | <div class='code'> | ||
1393 | <div class="highlight"><pre> <span class="n">sheet_schema</span><span class="p">,</span> <span class="n">columns</span> <span class="o">=</span> <span class="n">get_sheet_metadata</span><span class="p">(</span><span class="n">sheet</span><span class="p">,</span> <span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">client</span><span class="p">)</span> | ||
1394 | |||
1395 | <span class="k">if</span> <span class="ow">not</span> <span class="n">sheet_schema</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">columns</span><span class="p">:</span> | ||
1396 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'SKIPPING Empty Sheet: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
1397 | <span class="k">else</span><span class="p">:</span></pre></div> | ||
1398 | </div> | ||
1399 | </div> | ||
1400 | <div class='clearall'></div> | ||
1401 | <div class='section' id='section-87'> | ||
1402 | <div class='docs'> | ||
1403 | <div class='octowrap'> | ||
1404 | <a class='octothorpe' href='#section-87'>#</a> | ||
1405 | </div> | ||
1406 | <p>Transform sheet_metadata</p> | ||
1407 | </div> | ||
1408 | <div class='code'> | ||
1409 | <div class="highlight"><pre> <span class="n">sheet_metadata_tf</span> <span class="o">=</span> <span class="n">transform_sheet_metadata</span><span class="p">(</span><span class="n">spreadsheet_id</span><span class="p">,</span> <span class="n">sheet</span><span class="p">,</span> <span class="n">columns</span><span class="p">)</span> | ||
1410 | <span class="n">sheet_metadata</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_metadata_tf</span><span class="p">)</span></pre></div> | ||
1411 | </div> | ||
1412 | </div> | ||
1413 | <div class='clearall'></div> | ||
1414 | <div class='section' id='section-88'> | ||
1415 | <div class='docs'> | ||
1416 | <div class='octowrap'> | ||
1417 | <a class='octothorpe' href='#section-88'>#</a> | ||
1418 | </div> | ||
1419 | <h3>SHEET_DATA</h3> | ||
1420 | </div> | ||
1421 | <div class='code'> | ||
1422 | <div class="highlight"><pre> <span class="k">if</span> <span class="n">sheet_title</span> <span class="ow">in</span> <span class="n">selected_streams</span><span class="p">:</span> | ||
1423 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'STARTED Syncing Sheet </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">))</span> | ||
1424 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span> | ||
1425 | <span class="n">selected_fields</span> <span class="o">=</span> <span class="n">get_selected_fields</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span> | ||
1426 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Stream: </span><span class="si">{}</span><span class="s1">, selected_fields: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">selected_fields</span><span class="p">))</span> | ||
1427 | <span class="n">write_schema</span><span class="p">(</span><span class="n">catalog</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">)</span></pre></div> | ||
1428 | </div> | ||
1429 | </div> | ||
1430 | <div class='clearall'></div> | ||
1431 | <div class='section' id='section-89'> | ||
1432 | <div class='docs'> | ||
1433 | <div class='octowrap'> | ||
1434 | <a class='octothorpe' href='#section-89'>#</a> | ||
1435 | </div> | ||
1436 | <p>Emit a Singer ACTIVATE_VERSION message before initial sync (but not subsequent syncs) | ||
1437 | everytime after each sheet sync is complete. | ||
1438 | This forces hard deletes on the data downstream if fewer records are sent. | ||
1439 | https://github.com/singer-io/singer-python/blob/master/singer/messages.py#L137</p> | ||
1440 | </div> | ||
1441 | <div class='code'> | ||
1442 | <div class="highlight"><pre> <span class="n">last_integer</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">get_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span> | ||
1443 | <span class="n">activate_version</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)</span> | ||
1444 | <span class="n">activate_version_message</span> <span class="o">=</span> <span class="n">singer</span><span class="o">.</span><span class="n">ActivateVersionMessage</span><span class="p">(</span> | ||
1445 | <span class="n">stream</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1446 | <span class="n">version</span><span class="o">=</span><span class="n">activate_version</span><span class="p">)</span> | ||
1447 | <span class="k">if</span> <span class="n">last_integer</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span></pre></div> | ||
1448 | </div> | ||
1449 | </div> | ||
1450 | <div class='clearall'></div> | ||
1451 | <div class='section' id='section-90'> | ||
1452 | <div class='docs'> | ||
1453 | <div class='octowrap'> | ||
1454 | <a class='octothorpe' href='#section-90'>#</a> | ||
1455 | </div> | ||
1456 | <p>initial load, send activate_version before AND after data sync</p> | ||
1457 | </div> | ||
1458 | <div class='code'> | ||
1459 | <div class="highlight"><pre> <span class="n">singer</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span><span class="n">activate_version_message</span><span class="p">)</span> | ||
1460 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'INITIAL SYNC, Stream: </span><span class="si">{}</span><span class="s1">, Activate Version: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">))</span></pre></div> | ||
1461 | </div> | ||
1462 | </div> | ||
1463 | <div class='clearall'></div> | ||
1464 | <div class='section' id='section-91'> | ||
1465 | <div class='docs'> | ||
1466 | <div class='octowrap'> | ||
1467 | <a class='octothorpe' href='#section-91'>#</a> | ||
1468 | </div> | ||
1469 | <p>Determine max range of columns and rows for “paging” through the data</p> | ||
1470 | </div> | ||
1471 | <div class='code'> | ||
1472 | <div class="highlight"><pre> <span class="n">sheet_last_col_index</span> <span class="o">=</span> <span class="mi">1</span> | ||
1473 | <span class="n">sheet_last_col_letter</span> <span class="o">=</span> <span class="s1">'A'</span> | ||
1474 | <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span> | ||
1475 | <span class="n">col_index</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnIndex'</span><span class="p">)</span> | ||
1476 | <span class="n">col_letter</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'columnLetter'</span><span class="p">)</span> | ||
1477 | <span class="k">if</span> <span class="n">col_index</span> <span class="o">></span> <span class="n">sheet_last_col_index</span><span class="p">:</span> | ||
1478 | <span class="n">sheet_last_col_index</span> <span class="o">=</span> <span class="n">col_index</span> | ||
1479 | <span class="n">sheet_last_col_letter</span> <span class="o">=</span> <span class="n">col_letter</span> | ||
1480 | <span class="n">sheet_max_row</span> <span class="o">=</span> <span class="n">sheet</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'properties'</span><span class="p">)</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'gridProperties'</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'rowCount'</span><span class="p">)</span></pre></div> | ||
1481 | </div> | ||
1482 | </div> | ||
1483 | <div class='clearall'></div> | ||
1484 | <div class='section' id='section-92'> | ||
1485 | <div class='docs'> | ||
1486 | <div class='octowrap'> | ||
1487 | <a class='octothorpe' href='#section-92'>#</a> | ||
1488 | </div> | ||
1489 | <p>Initialize paging for 1st batch</p> | ||
1490 | </div> | ||
1491 | <div class='code'> | ||
1492 | <div class="highlight"><pre> <span class="n">is_last_row</span> <span class="o">=</span> <span class="kc">False</span> | ||
1493 | <span class="n">batch_rows</span> <span class="o">=</span> <span class="mi">200</span> | ||
1494 | <span class="n">from_row</span> <span class="o">=</span> <span class="mi">2</span> | ||
1495 | <span class="k">if</span> <span class="n">sheet_max_row</span> <span class="o"><</span> <span class="n">batch_rows</span><span class="p">:</span> | ||
1496 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">sheet_max_row</span> | ||
1497 | <span class="k">else</span><span class="p">:</span> | ||
1498 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">batch_rows</span></pre></div> | ||
1499 | </div> | ||
1500 | </div> | ||
1501 | <div class='clearall'></div> | ||
1502 | <div class='section' id='section-93'> | ||
1503 | <div class='docs'> | ||
1504 | <div class='octowrap'> | ||
1505 | <a class='octothorpe' href='#section-93'>#</a> | ||
1506 | </div> | ||
1507 | <p>Loop thru batches (each having 200 rows of data)</p> | ||
1508 | </div> | ||
1509 | <div class='code'> | ||
1510 | <div class="highlight"><pre> <span class="k">while</span> <span class="ow">not</span> <span class="n">is_last_row</span> <span class="ow">and</span> <span class="n">from_row</span> <span class="o"><</span> <span class="n">sheet_max_row</span> <span class="ow">and</span> <span class="n">to_row</span> <span class="o"><=</span> <span class="n">sheet_max_row</span><span class="p">:</span> | ||
1511 | <span class="n">range_rows</span> <span class="o">=</span> <span class="s1">'A</span><span class="si">{}</span><span class="s1">:</span><span class="si">{}{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">from_row</span><span class="p">,</span> <span class="n">sheet_last_col_letter</span><span class="p">,</span> <span class="n">to_row</span><span class="p">)</span></pre></div> | ||
1512 | </div> | ||
1513 | </div> | ||
1514 | <div class='clearall'></div> | ||
1515 | <div class='section' id='section-94'> | ||
1516 | <div class='docs'> | ||
1517 | <div class='octowrap'> | ||
1518 | <a class='octothorpe' href='#section-94'>#</a> | ||
1519 | </div> | ||
1520 | <p>GET sheet_data for a worksheet tab</p> | ||
1521 | </div> | ||
1522 | <div class='code'> | ||
1523 | <div class="highlight"><pre> <span class="n">sheet_data</span><span class="p">,</span> <span class="n">time_extracted</span> <span class="o">=</span> <span class="n">get_data</span><span class="p">(</span> | ||
1524 | <span class="n">stream_name</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1525 | <span class="n">endpoint_config</span><span class="o">=</span><span class="n">sheets_loaded_config</span><span class="p">,</span> | ||
1526 | <span class="n">client</span><span class="o">=</span><span class="n">client</span><span class="p">,</span> | ||
1527 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">,</span> | ||
1528 | <span class="n">range_rows</span><span class="o">=</span><span class="n">range_rows</span><span class="p">)</span></pre></div> | ||
1529 | </div> | ||
1530 | </div> | ||
1531 | <div class='clearall'></div> | ||
1532 | <div class='section' id='section-95'> | ||
1533 | <div class='docs'> | ||
1534 | <div class='octowrap'> | ||
1535 | <a class='octothorpe' href='#section-95'>#</a> | ||
1536 | </div> | ||
1537 | <p>Data is returned as a list of arrays, an array of values for each row</p> | ||
1538 | </div> | ||
1539 | <div class='code'> | ||
1540 | <div class="highlight"><pre> <span class="n">sheet_data_rows</span> <span class="o">=</span> <span class="n">sheet_data</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">'values'</span><span class="p">,</span> <span class="p">[])</span></pre></div> | ||
1541 | </div> | ||
1542 | </div> | ||
1543 | <div class='clearall'></div> | ||
1544 | <div class='section' id='section-96'> | ||
1545 | <div class='docs'> | ||
1546 | <div class='octowrap'> | ||
1547 | <a class='octothorpe' href='#section-96'>#</a> | ||
1548 | </div> | ||
1549 | <p>Transform batch of rows to JSON with keys for each column</p> | ||
1550 | </div> | ||
1551 | <div class='code'> | ||
1552 | <div class="highlight"><pre> <span class="n">sheet_data_tf</span><span class="p">,</span> <span class="n">row_num</span> <span class="o">=</span> <span class="n">transform_sheet_data</span><span class="p">(</span> | ||
1553 | <span class="n">spreadsheet_id</span><span class="o">=</span><span class="n">spreadsheet_id</span><span class="p">,</span> | ||
1554 | <span class="n">sheet_id</span><span class="o">=</span><span class="n">sheet_id</span><span class="p">,</span> | ||
1555 | <span class="n">sheet_title</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1556 | <span class="n">from_row</span><span class="o">=</span><span class="n">from_row</span><span class="p">,</span> | ||
1557 | <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">,</span> | ||
1558 | <span class="n">sheet_data_rows</span><span class="o">=</span><span class="n">sheet_data_rows</span><span class="p">)</span> | ||
1559 | <span class="k">if</span> <span class="n">row_num</span> <span class="o"><</span> <span class="n">to_row</span><span class="p">:</span> | ||
1560 | <span class="n">is_last_row</span> <span class="o">=</span> <span class="kc">True</span></pre></div> | ||
1561 | </div> | ||
1562 | </div> | ||
1563 | <div class='clearall'></div> | ||
1564 | <div class='section' id='section-97'> | ||
1565 | <div class='docs'> | ||
1566 | <div class='octowrap'> | ||
1567 | <a class='octothorpe' href='#section-97'>#</a> | ||
1568 | </div> | ||
1569 | <p>Process records, send batch of records to target</p> | ||
1570 | </div> | ||
1571 | <div class='code'> | ||
1572 | <div class="highlight"><pre> <span class="n">record_count</span> <span class="o">=</span> <span class="n">process_records</span><span class="p">(</span> | ||
1573 | <span class="n">catalog</span><span class="o">=</span><span class="n">catalog</span><span class="p">,</span> | ||
1574 | <span class="n">stream_name</span><span class="o">=</span><span class="n">sheet_title</span><span class="p">,</span> | ||
1575 | <span class="n">records</span><span class="o">=</span><span class="n">sheet_data_tf</span><span class="p">,</span> | ||
1576 | <span class="n">time_extracted</span><span class="o">=</span><span class="n">ss_time_extracted</span><span class="p">,</span> | ||
1577 | <span class="n">version</span><span class="o">=</span><span class="n">activate_version</span><span class="p">)</span> | ||
1578 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'Sheet: </span><span class="si">{}</span><span class="s1">, records processed: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1579 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">record_count</span><span class="p">))</span></pre></div> | ||
1580 | </div> | ||
1581 | </div> | ||
1582 | <div class='clearall'></div> | ||
1583 | <div class='section' id='section-98'> | ||
1584 | <div class='docs'> | ||
1585 | <div class='octowrap'> | ||
1586 | <a class='octothorpe' href='#section-98'>#</a> | ||
1587 | </div> | ||
1588 | <p>Update paging from/to_row for next batch</p> | ||
1589 | </div> | ||
1590 | <div class='code'> | ||
1591 | <div class="highlight"><pre> <span class="n">from_row</span> <span class="o">=</span> <span class="n">to_row</span> <span class="o">+</span> <span class="mi">1</span> | ||
1592 | <span class="k">if</span> <span class="n">to_row</span> <span class="o">+</span> <span class="n">batch_rows</span> <span class="o">></span> <span class="n">sheet_max_row</span><span class="p">:</span> | ||
1593 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">sheet_max_row</span> | ||
1594 | <span class="k">else</span><span class="p">:</span> | ||
1595 | <span class="n">to_row</span> <span class="o">=</span> <span class="n">to_row</span> <span class="o">+</span> <span class="n">batch_rows</span></pre></div> | ||
1596 | </div> | ||
1597 | </div> | ||
1598 | <div class='clearall'></div> | ||
1599 | <div class='section' id='section-99'> | ||
1600 | <div class='docs'> | ||
1601 | <div class='octowrap'> | ||
1602 | <a class='octothorpe' href='#section-99'>#</a> | ||
1603 | </div> | ||
1604 | <p>End of Stream: Send Activate Version and update State</p> | ||
1605 | </div> | ||
1606 | <div class='code'> | ||
1607 | <div class="highlight"><pre> <span class="n">singer</span><span class="o">.</span><span class="n">write_message</span><span class="p">(</span><span class="n">activate_version_message</span><span class="p">)</span> | ||
1608 | <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">)</span> | ||
1609 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'COMPLETE SYNC, Stream: </span><span class="si">{}</span><span class="s1">, Activate Version: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sheet_title</span><span class="p">,</span> <span class="n">activate_version</span><span class="p">))</span> | ||
1610 | <span class="n">LOGGER</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s1">'FINISHED Syncing Sheet </span><span class="si">{}</span><span class="s1">, Total Rows: </span><span class="si">{}</span><span class="s1">'</span><span class="o">.</span><span class="n">format</span><span class="p">(</span> | ||
1611 | <span class="n">sheet_title</span><span class="p">,</span> <span class="n">row_num</span> <span class="o">-</span> <span class="mi">2</span><span class="p">))</span> <span class="c1"># subtract 1 for header row</span> | ||
1612 | <span class="n">update_currently_syncing</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span></pre></div> | ||
1613 | </div> | ||
1614 | </div> | ||
1615 | <div class='clearall'></div> | ||
1616 | <div class='section' id='section-100'> | ||
1617 | <div class='docs'> | ||
1618 | <div class='octowrap'> | ||
1619 | <a class='octothorpe' href='#section-100'>#</a> | ||
1620 | </div> | ||
1621 | <p>SHEETS_LOADED | ||
1622 | Add sheet to sheets_loaded</p> | ||
1623 | </div> | ||
1624 | <div class='code'> | ||
1625 | <div class="highlight"><pre> <span class="n">sheet_loaded</span> <span class="o">=</span> <span class="p">{}</span> | ||
1626 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'spreadsheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">spreadsheet_id</span> | ||
1627 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'sheetId'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_id</span> | ||
1628 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'title'</span><span class="p">]</span> <span class="o">=</span> <span class="n">sheet_title</span> | ||
1629 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'loadDate'</span><span class="p">]</span> <span class="o">=</span> <span class="n">strftime</span><span class="p">(</span><span class="n">utils</span><span class="o">.</span><span class="n">now</span><span class="p">())</span> | ||
1630 | <span class="n">sheet_loaded</span><span class="p">[</span><span class="s1">'lastRowNumber'</span><span class="p">]</span> <span class="o">=</span> <span class="n">row_num</span> | ||
1631 | <span class="n">sheets_loaded</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">sheet_loaded</span><span class="p">)</span> | ||
1632 | |||
1633 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'sheet_metadata'</span></pre></div> | ||
1634 | </div> | ||
1635 | </div> | ||
1636 | <div class='clearall'></div> | ||
1637 | <div class='section' id='section-101'> | ||
1638 | <div class='docs'> | ||
1639 | <div class='octowrap'> | ||
1640 | <a class='octothorpe' href='#section-101'>#</a> | ||
1641 | </div> | ||
1642 | <p>Write sheet_metadata records if selected</p> | ||
1643 | </div> | ||
1644 | <div class='code'> | ||
1645 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">sheet_metadata</span><span class="p">)</span> | ||
1646 | |||
1647 | <span class="n">stream_name</span> <span class="o">=</span> <span class="s1">'sheets_loaded'</span></pre></div> | ||
1648 | </div> | ||
1649 | </div> | ||
1650 | <div class='clearall'></div> | ||
1651 | <div class='section' id='section-102'> | ||
1652 | <div class='docs'> | ||
1653 | <div class='octowrap'> | ||
1654 | <a class='octothorpe' href='#section-102'>#</a> | ||
1655 | </div> | ||
1656 | <p>Write sheet_metadata records if selected</p> | ||
1657 | </div> | ||
1658 | <div class='code'> | ||
1659 | <div class="highlight"><pre> <span class="n">sync_stream</span><span class="p">(</span><span class="n">stream_name</span><span class="p">,</span> <span class="n">selected_streams</span><span class="p">,</span> <span class="n">catalog</span><span class="p">,</span> <span class="n">state</span><span class="p">,</span> <span class="n">sheets_loaded</span><span class="p">)</span></pre></div> | ||
1660 | </div> | ||
1661 | </div> | ||
1662 | <div class='clearall'></div> | ||
1663 | <div class='section' id='section-103'> | ||
1664 | <div class='docs'> | ||
1665 | <div class='octowrap'> | ||
1666 | <a class='octothorpe' href='#section-103'>#</a> | ||
1667 | </div> | ||
1668 | <p>Update file_metadata bookmark</p> | ||
1669 | </div> | ||
1670 | <div class='code'> | ||
1671 | <div class="highlight"><pre> <span class="n">write_bookmark</span><span class="p">(</span><span class="n">state</span><span class="p">,</span> <span class="s1">'file_metadata'</span><span class="p">,</span> <span class="n">strftime</span><span class="p">(</span><span class="n">this_datetime</span><span class="p">))</span> | ||
1672 | |||
1673 | <span class="k">return</span> | ||
1674 | |||
1675 | </pre></div> | ||
1676 | </div> | ||
1677 | </div> | ||
1678 | <div class='clearall'></div> | ||
1679 | </div> | ||
1680 | </body> | ||