]>
Commit | Line | Data |
---|---|---|
824f8c45 | 1 | <?php |
2 | //============================================================+ | |
3 | // File name : tcpdf_parser.php | |
4 | // Version : 1.0.014 | |
5 | // Begin : 2011-05-23 | |
6 | // Last Update : 2014-02-18 | |
7 | // Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com | |
8 | // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 | |
9 | // ------------------------------------------------------------------- | |
10 | // Copyright (C) 2011-2014 Nicola Asuni - Tecnick.com LTD | |
11 | // | |
12 | // This file is part of TCPDF software library. | |
13 | // | |
14 | // TCPDF is free software: you can redistribute it and/or modify it | |
15 | // under the terms of the GNU Lesser General Public License as | |
16 | // published by the Free Software Foundation, either version 3 of the | |
17 | // License, or (at your option) any later version. | |
18 | // | |
19 | // TCPDF is distributed in the hope that it will be useful, but | |
20 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
22 | // See the GNU Lesser General Public License for more details. | |
23 | // | |
24 | // You should have received a copy of the License | |
25 | // along with TCPDF. If not, see | |
26 | // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>. | |
27 | // | |
28 | // See LICENSE.TXT file for more information. | |
29 | // ------------------------------------------------------------------- | |
30 | // | |
31 | // Description : This is a PHP class for parsing PDF documents. | |
32 | // | |
33 | //============================================================+ | |
34 | ||
35 | /** | |
36 | * @file | |
37 | * This is a PHP class for parsing PDF documents.<br> | |
38 | * @package com.tecnick.tcpdf | |
39 | * @author Nicola Asuni | |
40 | * @version 1.0.014 | |
41 | */ | |
42 | ||
43 | // include class for decoding filters | |
44 | require_once(dirname(__FILE__).'/include/tcpdf_filters.php'); | |
45 | ||
46 | /** | |
47 | * @class TCPDF_PARSER | |
48 | * This is a PHP class for parsing PDF documents.<br> | |
49 | * @package com.tecnick.tcpdf | |
50 | * @brief This is a PHP class for parsing PDF documents.. | |
51 | * @version 1.0.010 | |
52 | * @author Nicola Asuni - info@tecnick.com | |
53 | */ | |
54 | class TCPDF_PARSER { | |
55 | ||
56 | /** | |
57 | * Raw content of the PDF document. | |
58 | * @private | |
59 | */ | |
60 | private $pdfdata = ''; | |
61 | ||
62 | /** | |
63 | * XREF data. | |
64 | * @protected | |
65 | */ | |
66 | protected $xref = array(); | |
67 | ||
68 | /** | |
69 | * Array of PDF objects. | |
70 | * @protected | |
71 | */ | |
72 | protected $objects = array(); | |
73 | ||
74 | /** | |
75 | * Class object for decoding filters. | |
76 | * @private | |
77 | */ | |
78 | private $FilterDecoders; | |
79 | ||
80 | /** | |
81 | * Array of configuration parameters. | |
82 | * @private | |
83 | */ | |
84 | private $cfg = array( | |
85 | 'die_for_errors' => false, | |
86 | 'ignore_filter_decoding_errors' => true, | |
87 | 'ignore_missing_filter_decoders' => true, | |
88 | ); | |
89 | ||
90 | // ----------------------------------------------------------------------------- | |
91 | ||
92 | /** | |
93 | * Parse a PDF document an return an array of objects. | |
94 | * @param $data (string) PDF data to parse. | |
95 | * @param $cfg (array) Array of configuration parameters: | |
96 | * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; | |
97 | * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; | |
98 | * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. | |
99 | * @public | |
100 | * @since 1.0.000 (2011-05-24) | |
101 | */ | |
102 | public function __construct($data, $cfg=array()) { | |
103 | if (empty($data)) { | |
104 | $this->Error('Empty PDF data.'); | |
105 | } | |
106 | // find the pdf header starting position | |
107 | if (($trimpos = strpos($data, '%PDF-')) === FALSE) { | |
108 | $this->Error('Invalid PDF data: missing %PDF header.'); | |
109 | } | |
110 | // get PDF content string | |
111 | $this->pdfdata = substr($data, $trimpos); | |
112 | // get length | |
113 | $pdflen = strlen($this->pdfdata); | |
114 | // set configuration parameters | |
115 | $this->setConfig($cfg); | |
116 | // get xref and trailer data | |
117 | $this->xref = $this->getXrefData(); | |
118 | // parse all document objects | |
119 | $this->objects = array(); | |
120 | foreach ($this->xref['xref'] as $obj => $offset) { | |
121 | if (!isset($this->objects[$obj]) AND ($offset > 0)) { | |
122 | // decode objects with positive offset | |
123 | $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); | |
124 | } | |
125 | } | |
126 | // release some memory | |
127 | unset($this->pdfdata); | |
128 | $this->pdfdata = ''; | |
129 | } | |
130 | ||
131 | /** | |
132 | * Set the configuration parameters. | |
133 | * @param $cfg (array) Array of configuration parameters: | |
134 | * 'die_for_errors' : if true termitate the program execution in case of error, otherwise thows an exception; | |
135 | * 'ignore_filter_decoding_errors' : if true ignore filter decoding errors; | |
136 | * 'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors. | |
137 | * @public | |
138 | */ | |
139 | protected function setConfig($cfg) { | |
140 | if (isset($cfg['die_for_errors'])) { | |
141 | $this->cfg['die_for_errors'] = !!$cfg['die_for_errors']; | |
142 | } | |
143 | if (isset($cfg['ignore_filter_decoding_errors'])) { | |
144 | $this->cfg['ignore_filter_decoding_errors'] = !!$cfg['ignore_filter_decoding_errors']; | |
145 | } | |
146 | if (isset($cfg['ignore_missing_filter_decoders'])) { | |
147 | $this->cfg['ignore_missing_filter_decoders'] = !!$cfg['ignore_missing_filter_decoders']; | |
148 | } | |
149 | } | |
150 | ||
151 | /** | |
152 | * Return an array of parsed PDF document objects. | |
153 | * @return (array) Array of parsed PDF document objects. | |
154 | * @public | |
155 | * @since 1.0.000 (2011-06-26) | |
156 | */ | |
157 | public function getParsedData() { | |
158 | return array($this->xref, $this->objects); | |
159 | } | |
160 | ||
161 | /** | |
162 | * Get Cross-Reference (xref) table and trailer data from PDF document data. | |
163 | * @param $offset (int) xref offset (if know). | |
164 | * @param $xref (array) previous xref array (if any). | |
165 | * @return Array containing xref and trailer data. | |
166 | * @protected | |
167 | * @since 1.0.000 (2011-05-24) | |
168 | */ | |
169 | protected function getXrefData($offset=0, $xref=array()) { | |
170 | if ($offset == 0) { | |
171 | // find last startxref | |
172 | if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) { | |
173 | $this->Error('Unable to find startxref'); | |
174 | } | |
175 | $matches = array_pop($matches); | |
176 | $startxref = $matches[1]; | |
177 | } elseif (strpos($this->pdfdata, 'xref', $offset) == $offset) { | |
178 | // Already pointing at the xref table | |
179 | $startxref = $offset; | |
180 | } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
181 | // Cross-Reference Stream object | |
182 | $startxref = $offset; | |
183 | } elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) { | |
184 | // startxref found | |
185 | $startxref = $matches[1][0]; | |
186 | } else { | |
187 | $this->Error('Unable to find startxref'); | |
188 | } | |
189 | // check xref position | |
190 | if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) { | |
191 | // Cross-Reference | |
192 | $xref = $this->decodeXref($startxref, $xref); | |
193 | } else { | |
194 | // Cross-Reference Stream | |
195 | $xref = $this->decodeXrefStream($startxref, $xref); | |
196 | } | |
197 | if (empty($xref)) { | |
198 | $this->Error('Unable to find xref'); | |
199 | } | |
200 | return $xref; | |
201 | } | |
202 | ||
203 | /** | |
204 | * Decode the Cross-Reference section | |
205 | * @param $startxref (int) Offset at which the xref section starts (position of the 'xref' keyword). | |
206 | * @param $xref (array) Previous xref array (if any). | |
207 | * @return Array containing xref and trailer data. | |
208 | * @protected | |
209 | * @since 1.0.000 (2011-06-20) | |
210 | */ | |
211 | protected function decodeXref($startxref, $xref=array()) { | |
212 | $startxref += 4; // 4 is the lenght of the word 'xref' | |
213 | // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) | |
214 | $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref); | |
215 | // initialize object number | |
216 | $obj_num = 0; | |
217 | // search for cross-reference entries or subsection | |
218 | while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { | |
219 | if ($matches[0][1] != $offset) { | |
220 | // we are on another section | |
221 | break; | |
222 | } | |
223 | $offset += strlen($matches[0][0]); | |
224 | if ($matches[3][0] == 'n') { | |
225 | // create unique object index: [object number]_[generation number] | |
226 | $index = $obj_num.'_'.intval($matches[2][0]); | |
227 | // check if object already exist | |
228 | if (!isset($xref['xref'][$index])) { | |
229 | // store object offset position | |
230 | $xref['xref'][$index] = intval($matches[1][0]); | |
231 | } | |
232 | ++$obj_num; | |
233 | } elseif ($matches[3][0] == 'f') { | |
234 | ++$obj_num; | |
235 | } else { | |
236 | // object number (index) | |
237 | $obj_num = intval($matches[1][0]); | |
238 | } | |
239 | } | |
240 | // get trailer data | |
241 | if (preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { | |
242 | $trailer_data = $matches[1][0]; | |
243 | if (!isset($xref['trailer']) OR empty($xref['trailer'])) { | |
244 | // get only the last updated version | |
245 | $xref['trailer'] = array(); | |
246 | // parse trailer_data | |
247 | if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { | |
248 | $xref['trailer']['size'] = intval($matches[1]); | |
249 | } | |
250 | if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { | |
251 | $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]); | |
252 | } | |
253 | if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { | |
254 | $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]); | |
255 | } | |
256 | if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { | |
257 | $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]); | |
258 | } | |
259 | if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { | |
260 | $xref['trailer']['id'] = array(); | |
261 | $xref['trailer']['id'][0] = $matches[1]; | |
262 | $xref['trailer']['id'][1] = $matches[2]; | |
263 | } | |
264 | } | |
265 | if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { | |
266 | // get previous xref | |
267 | $xref = $this->getXrefData(intval($matches[1]), $xref); | |
268 | } | |
269 | } else { | |
270 | $this->Error('Unable to find trailer'); | |
271 | } | |
272 | return $xref; | |
273 | } | |
274 | ||
275 | /** | |
276 | * Decode the Cross-Reference Stream section | |
277 | * @param $startxref (int) Offset at which the xref section starts. | |
278 | * @param $xref (array) Previous xref array (if any). | |
279 | * @return Array containing xref and trailer data. | |
280 | * @protected | |
281 | * @since 1.0.003 (2013-03-16) | |
282 | */ | |
283 | protected function decodeXrefStream($startxref, $xref=array()) { | |
284 | // try to read Cross-Reference Stream | |
285 | $xrefobj = $this->getRawObject($startxref); | |
286 | $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true); | |
287 | if (!isset($xref['trailer']) OR empty($xref['trailer'])) { | |
288 | // get only the last updated version | |
289 | $xref['trailer'] = array(); | |
290 | $filltrailer = true; | |
291 | } else { | |
292 | $filltrailer = false; | |
293 | } | |
294 | if (!isset($xref['xref'])) { | |
295 | $xref['xref'] = array(); | |
296 | } | |
297 | $valid_crs = false; | |
298 | $columns = 0; | |
299 | $sarr = $xrefcrs[0][1]; | |
300 | foreach ($sarr as $k => $v) { | |
301 | if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) { | |
302 | $valid_crs = true; | |
303 | } elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) { | |
304 | // first object number in the subsection | |
305 | $index_first = intval($sarr[($k +1)][1][0][1]); | |
306 | // number of entries in the subsection | |
307 | $index_entries = intval($sarr[($k +1)][1][1][1]); | |
308 | } elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { | |
309 | // get previous xref offset | |
310 | $prevxref = intval($sarr[($k +1)][1]); | |
311 | } elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) { | |
312 | // number of bytes (in the decoded stream) of the corresponding field | |
313 | $wb = array(); | |
314 | $wb[0] = intval($sarr[($k +1)][1][0][1]); | |
315 | $wb[1] = intval($sarr[($k +1)][1][1][1]); | |
316 | $wb[2] = intval($sarr[($k +1)][1][2][1]); | |
317 | } elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) { | |
318 | $decpar = $sarr[($k +1)][1]; | |
319 | foreach ($decpar as $kdc => $vdc) { | |
320 | if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { | |
321 | $columns = intval($decpar[($kdc +1)][1]); | |
322 | } elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) { | |
323 | $predictor = intval($decpar[($kdc +1)][1]); | |
324 | } | |
325 | } | |
326 | } elseif ($filltrailer) { | |
327 | if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) { | |
328 | $xref['trailer']['size'] = $sarr[($k +1)][1]; | |
329 | } elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { | |
330 | $xref['trailer']['root'] = $sarr[($k +1)][1]; | |
331 | } elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { | |
332 | $xref['trailer']['info'] = $sarr[($k +1)][1]; | |
333 | } elseif (($v[0] == '/') AND ($v[1] == 'Encrypt') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'objref'))) { | |
334 | $xref['trailer']['encrypt'] = $sarr[($k +1)][1]; | |
335 | } elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) { | |
336 | $xref['trailer']['id'] = array(); | |
337 | $xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1]; | |
338 | $xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1]; | |
339 | } | |
340 | } | |
341 | } | |
342 | // decode data | |
343 | if ($valid_crs AND isset($xrefcrs[1][3][0])) { | |
344 | // number of bytes in a row | |
345 | $rowlen = ($columns + 1); | |
346 | // convert the stream into an array of integers | |
347 | $sdata = unpack('C*', $xrefcrs[1][3][0]); | |
348 | // split the rows | |
349 | $sdata = array_chunk($sdata, $rowlen); | |
350 | // initialize decoded array | |
351 | $ddata = array(); | |
352 | // initialize first row with zeros | |
353 | $prev_row = array_fill (0, $rowlen, 0); | |
354 | // for each row apply PNG unpredictor | |
355 | foreach ($sdata as $k => $row) { | |
356 | // initialize new row | |
357 | $ddata[$k] = array(); | |
358 | // get PNG predictor value | |
359 | $predictor = (10 + $row[0]); | |
360 | // for each byte on the row | |
361 | for ($i=1; $i<=$columns; ++$i) { | |
362 | // new index | |
363 | $j = ($i - 1); | |
364 | $row_up = $prev_row[$j]; | |
365 | if ($i == 1) { | |
366 | $row_left = 0; | |
367 | $row_upleft = 0; | |
368 | } else { | |
369 | $row_left = $row[($i - 1)]; | |
370 | $row_upleft = $prev_row[($j - 1)]; | |
371 | } | |
372 | switch ($predictor) { | |
373 | case 10: { // PNG prediction (on encoding, PNG None on all rows) | |
374 | $ddata[$k][$j] = $row[$i]; | |
375 | break; | |
376 | } | |
377 | case 11: { // PNG prediction (on encoding, PNG Sub on all rows) | |
378 | $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); | |
379 | break; | |
380 | } | |
381 | case 12: { // PNG prediction (on encoding, PNG Up on all rows) | |
382 | $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); | |
383 | break; | |
384 | } | |
385 | case 13: { // PNG prediction (on encoding, PNG Average on all rows) | |
386 | $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff); | |
387 | break; | |
388 | } | |
389 | case 14: { // PNG prediction (on encoding, PNG Paeth on all rows) | |
390 | // initial estimate | |
391 | $p = ($row_left + $row_up - $row_upleft); | |
392 | // distances | |
393 | $pa = abs($p - $row_left); | |
394 | $pb = abs($p - $row_up); | |
395 | $pc = abs($p - $row_upleft); | |
396 | $pmin = min($pa, $pb, $pc); | |
397 | // return minumum distance | |
398 | switch ($pmin) { | |
399 | case $pa: { | |
400 | $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff); | |
401 | break; | |
402 | } | |
403 | case $pb: { | |
404 | $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff); | |
405 | break; | |
406 | } | |
407 | case $pc: { | |
408 | $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff); | |
409 | break; | |
410 | } | |
411 | } | |
412 | break; | |
413 | } | |
414 | default: { // PNG prediction (on encoding, PNG optimum) | |
415 | $this->Error('Unknown PNG predictor'); | |
416 | break; | |
417 | } | |
418 | } | |
419 | } | |
420 | $prev_row = $ddata[$k]; | |
421 | } // end for each row | |
422 | // complete decoding | |
423 | $sdata = array(); | |
424 | // for every row | |
425 | foreach ($ddata as $k => $row) { | |
426 | // initialize new row | |
427 | $sdata[$k] = array(0, 0, 0); | |
428 | if ($wb[0] == 0) { | |
429 | // default type field | |
430 | $sdata[$k][0] = 1; | |
431 | } | |
432 | $i = 0; // count bytes in the row | |
433 | // for every column | |
434 | for ($c = 0; $c < 3; ++$c) { | |
435 | // for every byte on the column | |
436 | for ($b = 0; $b < $wb[$c]; ++$b) { | |
437 | if (isset($row[$i])) { | |
438 | $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8)); | |
439 | } | |
440 | ++$i; | |
441 | } | |
442 | } | |
443 | } | |
444 | $ddata = array(); | |
445 | // fill xref | |
446 | if (isset($index_first)) { | |
447 | $obj_num = $index_first; | |
448 | } else { | |
449 | $obj_num = 0; | |
450 | } | |
451 | foreach ($sdata as $k => $row) { | |
452 | switch ($row[0]) { | |
453 | case 0: { // (f) linked list of free objects | |
454 | break; | |
455 | } | |
456 | case 1: { // (n) objects that are in use but are not compressed | |
457 | // create unique object index: [object number]_[generation number] | |
458 | $index = $obj_num.'_'.$row[2]; | |
459 | // check if object already exist | |
460 | if (!isset($xref['xref'][$index])) { | |
461 | // store object offset position | |
462 | $xref['xref'][$index] = $row[1]; | |
463 | } | |
464 | break; | |
465 | } | |
466 | case 2: { // compressed objects | |
467 | // $row[1] = object number of the object stream in which this object is stored | |
468 | // $row[2] = index of this object within the object stream | |
469 | $index = $row[1].'_0_'.$row[2]; | |
470 | $xref['xref'][$index] = -1; | |
471 | break; | |
472 | } | |
473 | default: { // null objects | |
474 | break; | |
475 | } | |
476 | } | |
477 | ++$obj_num; | |
478 | } | |
479 | } // end decoding data | |
480 | if (isset($prevxref)) { | |
481 | // get previous xref | |
482 | $xref = $this->getXrefData($prevxref, $xref); | |
483 | } | |
484 | return $xref; | |
485 | } | |
486 | ||
487 | /** | |
488 | * Get object type, raw value and offset to next object | |
489 | * @param $offset (int) Object offset. | |
490 | * @return array containing object type, raw value and offset to next object | |
491 | * @protected | |
492 | * @since 1.0.000 (2011-06-20) | |
493 | */ | |
494 | protected function getRawObject($offset=0) { | |
495 | $objtype = ''; // object type to be returned | |
496 | $objval = ''; // object value to be returned | |
497 | // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) | |
498 | $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); | |
499 | // get first char | |
500 | $char = $this->pdfdata[$offset]; | |
501 | // get object type | |
502 | switch ($char) { | |
503 | case '%': { // \x25 PERCENT SIGN | |
504 | // skip comment and search for next token | |
505 | $next = strcspn($this->pdfdata, "\r\n", $offset); | |
506 | if ($next > 0) { | |
507 | $offset += $next; | |
508 | return $this->getRawObject($offset); | |
509 | } | |
510 | break; | |
511 | } | |
512 | case '/': { // \x2F SOLIDUS | |
513 | // name object | |
514 | $objtype = $char; | |
515 | ++$offset; | |
516 | if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) { | |
517 | $objval = $matches[1]; // unescaped value | |
518 | $offset += strlen($objval); | |
519 | } | |
520 | break; | |
521 | } | |
522 | case '(': // \x28 LEFT PARENTHESIS | |
523 | case ')': { // \x29 RIGHT PARENTHESIS | |
524 | // literal string object | |
525 | $objtype = $char; | |
526 | ++$offset; | |
527 | $strpos = $offset; | |
528 | if ($char == '(') { | |
529 | $open_bracket = 1; | |
530 | while ($open_bracket > 0) { | |
531 | if (!isset($this->pdfdata{$strpos})) { | |
532 | break; | |
533 | } | |
534 | $ch = $this->pdfdata{$strpos}; | |
535 | switch ($ch) { | |
536 | case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) | |
537 | // skip next character | |
538 | ++$strpos; | |
539 | break; | |
540 | } | |
541 | case '(': { // LEFT PARENHESIS (28h) | |
542 | ++$open_bracket; | |
543 | break; | |
544 | } | |
545 | case ')': { // RIGHT PARENTHESIS (29h) | |
546 | --$open_bracket; | |
547 | break; | |
548 | } | |
549 | } | |
550 | ++$strpos; | |
551 | } | |
552 | $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1)); | |
553 | $offset = $strpos; | |
554 | } | |
555 | break; | |
556 | } | |
557 | case '[': // \x5B LEFT SQUARE BRACKET | |
558 | case ']': { // \x5D RIGHT SQUARE BRACKET | |
559 | // array object | |
560 | $objtype = $char; | |
561 | ++$offset; | |
562 | if ($char == '[') { | |
563 | // get array content | |
564 | $objval = array(); | |
565 | do { | |
566 | // get element | |
567 | $element = $this->getRawObject($offset); | |
568 | $offset = $element[2]; | |
569 | $objval[] = $element; | |
570 | } while ($element[0] != ']'); | |
571 | // remove closing delimiter | |
572 | array_pop($objval); | |
573 | } | |
574 | break; | |
575 | } | |
576 | case '<': // \x3C LESS-THAN SIGN | |
577 | case '>': { // \x3E GREATER-THAN SIGN | |
578 | if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) { | |
579 | // dictionary object | |
580 | $objtype = $char.$char; | |
581 | $offset += 2; | |
582 | if ($char == '<') { | |
583 | // get array content | |
584 | $objval = array(); | |
585 | do { | |
586 | // get element | |
587 | $element = $this->getRawObject($offset); | |
588 | $offset = $element[2]; | |
589 | $objval[] = $element; | |
590 | } while ($element[0] != '>>'); | |
591 | // remove closing delimiter | |
592 | array_pop($objval); | |
593 | } | |
594 | } else { | |
595 | // hexadecimal string object | |
596 | $objtype = $char; | |
597 | ++$offset; | |
598 | if (($char == '<') AND (preg_match('/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU', substr($this->pdfdata, $offset), $matches) == 1)) { | |
599 | // remove white space characters | |
600 | $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", ''); | |
601 | $offset += strlen($matches[0]); | |
602 | } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== FALSE) { | |
603 | $offset = $endpos + 1; | |
604 | } | |
605 | } | |
606 | break; | |
607 | } | |
608 | default: { | |
609 | if (substr($this->pdfdata, $offset, 6) == 'endobj') { | |
610 | // indirect object | |
611 | $objtype = 'endobj'; | |
612 | $offset += 6; | |
613 | } elseif (substr($this->pdfdata, $offset, 4) == 'null') { | |
614 | // null object | |
615 | $objtype = 'null'; | |
616 | $offset += 4; | |
617 | $objval = 'null'; | |
618 | } elseif (substr($this->pdfdata, $offset, 4) == 'true') { | |
619 | // boolean true object | |
620 | $objtype = 'boolean'; | |
621 | $offset += 4; | |
622 | $objval = 'true'; | |
623 | } elseif (substr($this->pdfdata, $offset, 5) == 'false') { | |
624 | // boolean false object | |
625 | $objtype = 'boolean'; | |
626 | $offset += 5; | |
627 | $objval = 'false'; | |
628 | } elseif (substr($this->pdfdata, $offset, 6) == 'stream') { | |
629 | // start stream object | |
630 | $objtype = 'stream'; | |
631 | $offset += 6; | |
632 | if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) { | |
633 | $offset += strlen($matches[0]); | |
634 | if (preg_match('/(endstream)[\x09\x0a\x0c\x0d\x20]/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) { | |
635 | $objval = substr($this->pdfdata, $offset, $matches[0][1]); | |
636 | $offset += $matches[1][1]; | |
637 | } | |
638 | } | |
639 | } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') { | |
640 | // end stream object | |
641 | $objtype = 'endstream'; | |
642 | $offset += 9; | |
643 | } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { | |
644 | // indirect object reference | |
645 | $objtype = 'objref'; | |
646 | $offset += strlen($matches[0]); | |
647 | $objval = intval($matches[1]).'_'.intval($matches[2]); | |
648 | } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { | |
649 | // object start | |
650 | $objtype = 'obj'; | |
651 | $objval = intval($matches[1]).'_'.intval($matches[2]); | |
652 | $offset += strlen ($matches[0]); | |
653 | } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) { | |
654 | // numeric object | |
655 | $objtype = 'numeric'; | |
656 | $objval = substr($this->pdfdata, $offset, $numlen); | |
657 | $offset += $numlen; | |
658 | } | |
659 | break; | |
660 | } | |
661 | } | |
662 | return array($objtype, $objval, $offset); | |
663 | } | |
664 | ||
665 | /** | |
666 | * Get content of indirect object. | |
667 | * @param $obj_ref (string) Object number and generation number separated by underscore character. | |
668 | * @param $offset (int) Object offset. | |
669 | * @param $decoding (boolean) If true decode streams. | |
670 | * @return array containing object data. | |
671 | * @protected | |
672 | * @since 1.0.000 (2011-05-24) | |
673 | */ | |
674 | protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { | |
675 | $obj = explode('_', $obj_ref); | |
676 | if (($obj === false) OR (count($obj) != 2)) { | |
677 | $this->Error('Invalid object reference: '.$obj); | |
678 | return; | |
679 | } | |
680 | $objref = $obj[0].' '.$obj[1].' obj'; | |
681 | // ignore leading zeros | |
682 | $offset += strspn($this->pdfdata, '0', $offset); | |
683 | if (strpos($this->pdfdata, $objref, $offset) != $offset) { | |
684 | // an indirect reference to an undefined object shall be considered a reference to the null object | |
685 | return array('null', 'null', $offset); | |
686 | } | |
687 | // starting position of object content | |
688 | $offset += strlen($objref); | |
689 | // get array of object content | |
690 | $objdata = array(); | |
691 | $i = 0; // object main index | |
692 | do { | |
693 | // get element | |
694 | $element = $this->getRawObject($offset); | |
695 | $offset = $element[2]; | |
696 | // decode stream using stream's dictionary information | |
697 | if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) { | |
698 | $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); | |
699 | } | |
700 | $objdata[$i] = $element; | |
701 | ++$i; | |
702 | } while ($element[0] != 'endobj'); | |
703 | // remove closing delimiter | |
704 | array_pop($objdata); | |
705 | // return raw object content | |
706 | return $objdata; | |
707 | } | |
708 | ||
709 | /** | |
710 | * Get the content of object, resolving indect object reference if necessary. | |
711 | * @param $obj (string) Object value. | |
712 | * @return array containing object data. | |
713 | * @protected | |
714 | * @since 1.0.000 (2011-06-26) | |
715 | */ | |
716 | protected function getObjectVal($obj) { | |
717 | if ($obj[0] == 'objref') { | |
718 | // reference to indirect object | |
719 | if (isset($this->objects[$obj[1]])) { | |
720 | // this object has been already parsed | |
721 | return $this->objects[$obj[1]]; | |
722 | } elseif (isset($this->xref[$obj[1]])) { | |
723 | // parse new object | |
724 | $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false); | |
725 | return $this->objects[$obj[1]]; | |
726 | } | |
727 | } | |
728 | return $obj; | |
729 | } | |
730 | ||
731 | /** | |
732 | * Decode the specified stream. | |
733 | * @param $sdic (array) Stream's dictionary array. | |
734 | * @param $stream (string) Stream to decode. | |
735 | * @return array containing decoded stream data and remaining filters. | |
736 | * @protected | |
737 | * @since 1.0.000 (2011-06-22) | |
738 | */ | |
739 | protected function decodeStream($sdic, $stream) { | |
740 | // get stream lenght and filters | |
741 | $slength = strlen($stream); | |
742 | if ($slength <= 0) { | |
743 | return array('', array()); | |
744 | } | |
745 | $filters = array(); | |
746 | foreach ($sdic as $k => $v) { | |
747 | if ($v[0] == '/') { | |
748 | if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) { | |
749 | // get declared stream lenght | |
750 | $declength = intval($sdic[($k + 1)][1]); | |
751 | if ($declength < $slength) { | |
752 | $stream = substr($stream, 0, $declength); | |
753 | $slength = $declength; | |
754 | } | |
755 | } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) { | |
756 | // resolve indirect object | |
757 | $objval = $this->getObjectVal($sdic[($k + 1)]); | |
758 | if ($objval[0] == '/') { | |
759 | // single filter | |
760 | $filters[] = $objval[1]; | |
761 | } elseif ($objval[0] == '[') { | |
762 | // array of filters | |
763 | foreach ($objval[1] as $flt) { | |
764 | if ($flt[0] == '/') { | |
765 | $filters[] = $flt[1]; | |
766 | } | |
767 | } | |
768 | } | |
769 | } | |
770 | } | |
771 | } | |
772 | // decode the stream | |
773 | $remaining_filters = array(); | |
774 | foreach ($filters as $filter) { | |
775 | if (in_array($filter, TCPDF_FILTERS::getAvailableFilters())) { | |
776 | try { | |
777 | $stream = TCPDF_FILTERS::decodeFilter($filter, $stream); | |
778 | } catch (Exception $e) { | |
779 | $emsg = $e->getMessage(); | |
780 | if ((($emsg[0] == '~') AND !$this->cfg['ignore_missing_filter_decoders']) | |
781 | OR (($emsg[0] != '~') AND !$this->cfg['ignore_filter_decoding_errors'])) { | |
782 | $this->Error($e->getMessage()); | |
783 | } | |
784 | } | |
785 | } else { | |
786 | // add missing filter to array | |
787 | $remaining_filters[] = $filter; | |
788 | } | |
789 | } | |
790 | return array($stream, $remaining_filters); | |
791 | } | |
792 | ||
793 | /** | |
794 | * Throw an exception or print an error message and die if the K_TCPDF_PARSER_THROW_EXCEPTION_ERROR constant is set to true. | |
795 | * @param $msg (string) The error message | |
796 | * @public | |
797 | * @since 1.0.000 (2011-05-23) | |
798 | */ | |
799 | public function Error($msg) { | |
800 | if ($this->cfg['die_for_errors']) { | |
801 | die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg); | |
802 | } else { | |
803 | throw new Exception('TCPDF_PARSER ERROR: '.$msg); | |
804 | } | |
805 | } | |
806 | ||
807 | } // END OF TCPDF_PARSER CLASS | |
808 | ||
809 | //============================================================+ | |
810 | // END OF FILE | |
811 | //============================================================+ |