HTML2PDF_parsingHtml::_analiseCode()   F
last analyzed

Complexity

Conditions 39
Paths > 20000

Size

Total Lines 147
Code Lines 99

Duplication

Lines 6
Ratio 4.08 %

Importance

Changes 0
Metric Value
cc 39
eloc 99
nc 2949121
nop 1
dl 6
loc 147
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php if ( !defined( 'ABSPATH' ) ) exit;
2
/**
3
 * HTML2PDF Librairy - parsingHtml class
4
 *
5
 * HTML => PDF convertor
6
 * distributed under the LGPL License
7
 *
8
 * @author      Laurent MINGUET <[email protected]>
9
 * @version     4.03
10
 */
11
12
class HTML2PDF_parsingHtml
13
{
14
    protected    $_html     = '';        // HTML code to parse
15
    protected    $_num      = 0;         // table number
16
    protected    $_level    = 0;         // table level
17
    protected    $_encoding = '';        // encoding
18
    public       $code      = array();   // parsed HTML codfe
19
20
    const HTML_TAB = '        ';
21
22
    /**
23
     * main constructor
24
     *
25
     * @param   string encoding
26
     * @access  public
27
     */
28
    public function __construct($encoding = 'UTF-8')
29
    {
30
        $this->_num   = 0;
31
        $this->_level = array($this->_num);
0 ignored issues
show
Documentation Bug introduced by
It seems like array($this->_num) of type array<integer,integer,{"0":"integer"}> is incompatible with the declared type integer of property $_level.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
32
        $this->_html  = '';
33
        $this->code  = array();
34
        $this->setEncoding($encoding);
35
    }
36
37
    /**
38
     * change the encoding
39
     *
40
     * @param   string encoding
41
     * @access  public
42
     */
43
    public function setEncoding($encoding)
44
    {
45
        $this->_encoding = $encoding;
46
    }
47
48
    /**
49
     * Define the HTML code to parse
50
     *
51
     * @param   string HTML code
52
     * @access  public
53
     */
54
    public function setHTML($html)
55
    {
56
        // remove the HTML in comment
57
        $html = preg_replace('/<!--(.*)-->/isU', '', $html);
58
59
        // save the HTML code
60
        $this->_html = $html;
61
    }
62
63
    /**
64
     * parse the HTML code
65
     *
66
     * @access public
67
     */
68
    public function parse()
69
    {
70
        $parents = array();
71
72
        // flag : are we in a <pre> Tag ?
73
        $tagPreIn = false;
74
75
        // action to use for each line of the content of a <pre> Tag
76
        $tagPreBr = array(
77
                    'name' => 'br',
78
                    'close' => false,
79
                    'param' => array(
80
                        'style' => array(),
81
                        'num'    => 0
82
                    )
83
                );
84
85
        // tag that can be not closed
86
        $tagsNotClosed = array(
87
            'br', 'hr', 'img', 'col',
88
            'input', 'link', 'option',
89
            'circle', 'ellipse', 'path', 'rect', 'line', 'polygon', 'polyline'
90
        );
91
92
        // search the HTML tags
93
        $tmp = array();
94
        $this->_searchCode($tmp);
95
96
        // all the actions to do
97
        $actions = array();
98
99
        // foreach part of the HTML code
100
        foreach ($tmp as $part) {
101
            // if it is a tag code
102
            if ($part[0]=='code') {
103
                // analise the HTML code
104
                $res = $this->_analiseCode($part[1]);
105
106
                // if it is a real HTML tag
107
                if ($res) {
108
                    // save the current posistion in the HTML code
109
                    $res['html_pos'] = $part[2];
110
111
                    // if the tag must be closed
112
                    if (!in_array($res['name'], $tagsNotClosed)) {
113
                        // if it is a closure tag
114
                        if ($res['close']) {
115
                            // HTML validation
116
                            if (count($parents)<1)
117
                                throw new HTML2PDF_exception(3, $res['name'], $this->getHtmlErrorCode($res['html_pos']));
118
                            else if ($parents[count($parents)-1]!=$res['name'])
119
                                throw new HTML2PDF_exception(4, $parents, $this->getHtmlErrorCode($res['html_pos']));
120
                            else
121
                                unset($parents[count($parents)-1]);
122
                        } else {
123
                            // if it is a autoclosed tag
124
                            if ($res['autoclose']) {
125
                                // save the opened tag
126
                                $actions[] = $res;
127
128
                                // prepare the closed tag
129
                                $res['params'] = array();
130
                                $res['close'] = true;
131
                            }
132
                            // else :add a child for validation
133
                            else
134
                                $parents[count($parents)] = $res['name'];
135
                        }
136
137
                        // if it is a <pre> tag (or <code> tag) not auclosed => update the flag
138
                        if (($res['name']=='pre' || $res['name']=='code') && !$res['autoclose']) {
139
                            $tagPreIn = !$res['close'];
140
                        }
141
                    }
142
143
                    // save the actions to convert
144
                    $actions[] = $res;
145
                } else { // else (it is not a real HTML tag => we transform it in Texte
146
                    $part[0]='txt';
147
                }
148
            }
149
            // if it is text
150
            if ($part[0]=='txt') {
151
                // if we are not in a <pre> tag
152
                if (!$tagPreIn) {
153
                    // save the action
154
                    $actions[] = array(
155
                        'name'    => 'write',
156
                        'close'    => false,
157
                        'param' => array('txt' => $this->_prepareTxt($part[1])),
158
                    );
159
                } else { // else (if we are in a <pre> tag)
160
                    // prepare the text
161
                    $part[1] = str_replace("\r", '', $part[1]);
162
                    $part[1] = explode("\n", $part[1]);
163
164
                    // foreach line of the text
165
                    foreach ($part[1] as $k => $txt) {
166
                        // transform the line
167
                        $txt = str_replace("\t", self::HTML_TAB, $txt);
168
                        $txt = str_replace(' ', '&nbsp;', $txt);
169
170
                        // add a break line
171
                        if ($k>0) $actions[] = $tagPreBr;
172
173
                        // save the action
174
                        $actions[] = array(
175
                            'name'    => 'write',
176
                            'close'    => false,
177
                            'param' => array('txt' => $this->_prepareTxt($txt, false)),
178
                        );
179
                    }
180
                }
181
            }
182
        }
183
184
        // for each indentified action, we have to clean up the begin and the end of the texte
185
        // based on tags that surround it
186
187
        // list of the tags to clean
188
        $tagsToClean = array(
189
            'page', 'page_header', 'page_footer', 'form',
190
            'table', 'thead', 'tfoot', 'tr', 'td', 'th', 'br',
191
            'div', 'hr', 'p', 'ul', 'ol', 'li',
192
            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
193
            'bookmark', 'fieldset', 'legend',
194
            'draw', 'circle', 'ellipse', 'path', 'rect', 'line', 'g', 'polygon', 'polyline',
195
            'option'
196
        );
197
198
        // foreach action
199
        $nb = count($actions);
200
        for ($k=0; $k<$nb; $k++) {
201
            // if it is a Text
202
            if ($actions[$k]['name']=='write') {
203
                // if the tag before the text is a tag to clean => ltrim on the text
204 View Code Duplication
                if ($k>0 && in_array($actions[$k-1]['name'], $tagsToClean))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
205
                    $actions[$k]['param']['txt'] = ltrim($actions[$k]['param']['txt']);
206
207
                // if the tag after the text is a tag to clean => rtrim on the text
208 View Code Duplication
                if ($k<$nb-1 && in_array($actions[$k+1]['name'], $tagsToClean))
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
209
                    $actions[$k]['param']['txt'] = rtrim($actions[$k]['param']['txt']);
210
211
                // if the text is empty => remove the action
212
                if (!strlen($actions[$k]['param']['txt']))
213
                    unset($actions[$k]);
214
            }
215
        }
216
217
        // if we are not on the level 0 => HTML validator ERROR
218
        if (count($parents)) throw new HTML2PDF_exception(5, $parents);
219
220
        // save the actions to do
221
        $this->code = array_values($actions);
222
    }
223
224
    /**
225
     * prepare the text
226
     *
227
     * @param   string texte
228
     * @param   boolean true => replace multiple space+\t+\r+\n by a single space
229
     * @return  string texte
230
     * @access  protected
231
     */
232
    protected function _prepareTxt($txt, $spaces = true)
233
    {
234
        if ($spaces) $txt = preg_replace('/\s+/is', ' ', $txt);
235
        $txt = str_replace('&euro;', '€', $txt);
236
        $txt = html_entity_decode($txt, ENT_QUOTES, $this->_encoding);
237
        return $txt;
238
    }
239
240
    /**
241
     * parse the HTML code
242
     *
243
     * @param    &array    array's result
244
     * @return   null
245
     */
246
    protected function _searchCode(&$tmp)
247
    {
248
        // initialise the array
249
        $tmp = array();
250
251
        // regexp to separate the tags from the texts
252
        $reg = '/(<[^>]+>)|([^<]+)+/isU';
253
254
        // last match found
255
        $str = '';
256
        $offset = 0;
257
258
        // As it finds a match
259
        while (preg_match($reg, $this->_html, $parse, PREG_OFFSET_CAPTURE, $offset)) {
260
            // if it is a tag
261
            if ($parse[1][0]) {
262
                // save the previous text if it exists
263
                if ($str!=='')    $tmp[] = array('txt', $str);
264
265
                // save the tag, with the offset
266
                $tmp[] = array('code', trim($parse[1][0]), $offset);
267
268
                // init the current text
269
                $str = '';
270
            } else { // else (if it is a text)
271
                // add the new text to the current text
272
                $str.= $parse[2][0];
273
            }
274
275
            // Update offset to the end of the match
276
            $offset = $parse[0][1] + strlen($parse[0][0]);
277
            unset($parse);
278
        }
279
        // if a text is present in the end, we save it
280
        if ($str!='') $tmp[] = array('txt', $str);
281
        unset($str);
282
    }
283
284
    /**
285
     * analise a HTML tag
286
     *
287
     * @param   string   HTML code to analise
288
     * @return  array    corresponding action
289
     */
290
    protected function _analiseCode($code)
291
    {
292
        // name of the tag, opening, closure, autoclosure
293
        $tag = '<([\/]{0,1})([_a-z0-9]+)([\/>\s]+)';
294
        if (!preg_match('/'.$tag.'/isU', $code, $match)) return null;
295
        $close     = ($match[1]=='/' ? true : false);
296
        $autoclose = preg_match('/\/>$/isU', $code);
297
        $name      = strtolower($match[2]);
298
299
        // required parameters (depends on the tag name)
300
        $param    = array();
301
        $param['style'] = '';
302
        if ($name=='img') {
303
            $param['alt'] = '';
304
            $param['src'] = '';
305
        }
306
        if ($name=='a') {
307
            $param['href'] = '';
308
        }
309
310
        // read the parameters : nom=valeur
311
        $prop = '([a-zA-Z0-9_]+)=([^"\'\s>]+)';
312
        preg_match_all('/'.$prop.'/is', $code, $match);
313 View Code Duplication
        for($k=0; $k<count($match[0]); $k++)
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
314
            $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]);
315
316
        // read the parameters : nom="valeur"
317
        $prop = '([a-zA-Z0-9_]+)=["]([^"]*)["]';
318
        preg_match_all('/'.$prop.'/is', $code, $match);
319 View Code Duplication
        for($k=0; $k<count($match[0]); $k++)
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
320
            $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]);
321
322
        // read the parameters : nom='valeur'
323
        $prop = "([a-zA-Z0-9_]+)=[']([^']*)[']";
324
        preg_match_all('/'.$prop.'/is', $code, $match);
325 View Code Duplication
        for($k=0; $k<count($match[0]); $k++)
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
326
            $param[trim(strtolower($match[1][$k]))] = trim($match[2][$k]);
327
328
        // compliance of each parameter
329
        $color  = "#000000";
330
        $border = null;
331
        foreach ($param as $key => $val) {
332
            $key = strtolower($key);
333
            switch($key)
334
            {
335
                case 'width':
336
                    unset($param[$key]);
337
                    $param['style'] .= 'width: '.$val.'px; ';
338
                    break;
339
340
                case 'align':
341
                    if ($name==='img') {
342
                        unset($param[$key]);
343
                        $param['style'] .= 'float: '.$val.'; ';
344
                    } elseif ($name!=='table') {
345
                        unset($param[$key]);
346
                        $param['style'] .= 'text-align: '.$val.'; ';
347
                    }
348
                    break;
349
350
                case 'valign':
351
                    unset($param[$key]);
352
                    $param['style'] .= 'vertical-align: '.$val.'; ';
353
                    break;
354
355
                case 'height':
356
                    unset($param[$key]);
357
                    $param['style'] .= 'height: '.$val.'px; ';
358
                    break;
359
360
                case 'bgcolor':
361
                    unset($param[$key]);
362
                    $param['style'] .= 'background: '.$val.'; ';
363
                    break;
364
365
                case 'bordercolor':
366
                    unset($param[$key]);
367
                    $color = $val;
368
                    break;
369
370
                case 'border':
371
                    unset($param[$key]);
372
                    if (preg_match('/^[0-9]+$/isU', $val)) $val = $val.'px';
373
                    $border = $val;
374
                    break;
375
376
                case 'cellpadding':
377
                case 'cellspacing':
378
                    if (preg_match('/^([0-9]+)$/isU', $val)) $param[$key] = $val.'px';
379
                    break;
380
381
                case 'colspan':
382
                case 'rowspan':
383
                    $val = preg_replace('/[^0-9]/isU', '', $val);
384
                    if (!$val) $val = 1;
385
                    $param[$key] = $val;
386
                    break;
387
            }
388
        }
389
390
        // compliance of the border
391
        if ($border!==null) {
392
            if ($border)    $border = 'border: solid '.$border.' '.$color;
393
            else            $border = 'border: none';
394
395
            $param['style'] .= $border.'; ';
396
            $param['border'] = $border;
397
        }
398
399
        // reading styles: decomposition and standardization
400
        $styles = explode(';', $param['style']);
401
        $param['style'] = array();
402
        foreach ($styles as $style) {
403
            $tmp = explode(':', $style);
404
            if (count($tmp)>1) {
405
                $cod = $tmp[0];
406
                unset($tmp[0]);
407
                $tmp = implode(':', $tmp);
408
                $param['style'][trim(strtolower($cod))] = preg_replace('/[\s]+/isU', ' ', trim($tmp));
409
            }
410
        }
411
412
        // determining the level of table opening, with an added level
413
        if (in_array($name, array('ul', 'ol', 'table')) && !$close) {
414
            $this->_num++;
415
            $this->_level[count($this->_level)] = $this->_num;
416
        }
417
418
        // get the level of the table containing the element
419
        if (!isset($param['num'])) {
420
            $param['num'] = $this->_level[count($this->_level)-1];
421
        }
422
423
        // for closures table: remove a level
424
        if (in_array($name, array('ul', 'ol', 'table')) && $close) {
425
            unset($this->_level[count($this->_level)-1]);
426
        }
427
428
        // prepare the parameters
429
        if (isset($param['value']))  $param['value']  = $this->_prepareTxt($param['value']);
430
        if (isset($param['alt']))    $param['alt']    = $this->_prepareTxt($param['alt']);
431
        if (isset($param['title']))  $param['title']  = $this->_prepareTxt($param['title']);
432
        if (isset($param['class']))  $param['class']  = $this->_prepareTxt($param['class']);
433
434
        // return the new action to do
435
        return array('name' => $name, 'close' => $close ? 1 : 0, 'autoclose' => $autoclose, 'param' => $param);
436
    }
437
438
    /**
439
     * get a full level of HTML, between an opening and closing corresponding
440
     *
441
     * @param   integer key
442
     * @return  array   actions
443
     */
444
    public function getLevel($k)
445
    {
446
        // if the code does not exist => return empty
447
        if (!isset($this->code[$k])) return array();
448
449
        // the tag to detect
450
        $detect = $this->code[$k]['name'];
451
452
        // if it is a text => return
453
        if ($detect=='write') {
454
            return array($this->code[$k]);
455
        }
456
457
        //
458
        $level = 0;      // depth level
459
        $end = false;    // end of the search
460
        $code = array(); // extract code
461
462
        // while it's not ended
463
        while (!$end) {
464
            // current action
465
            $row = $this->code[$k];
466
467
            // if 'write' => we add the text
468
            if ($row['name']=='write') {
469
                $code[] = $row;
470
            } else { // else, it is a html tag
471
                $not = false; // flag for not taking into account the current tag
472
473
                // if it is the searched tag
474
                if ($row['name']==$detect) {
475
                    // if we are just at the root level => dont take it
476
                    if ($level==0) {
477
                        $not = true;
478
                    }
479
480
                    // update the level
481
                    $level+= ($row['close'] ? -1 : 1);
482
483
                    // if we are now at the root level => it is the end, and dont take it
484
                    if ($level==0) {
485
                        $not = true;
486
                        $end = true;
487
                    }
488
                }
489
490
                // if we can takin into account the current tag => save it
491
                if (!$not) {
492
                    if (isset($row['style']['text-align'])) unset($row['style']['text-align']);
493
                    $code[] = $row;
494
                }
495
            }
496
497
            // it continues as long as there has code to analise
498
            if (isset($this->code[$k+1]))
499
                $k++;
500
            else
501
                $end = true;
502
        }
503
504
        // return the extract
505
        return $code;
506
    }
507
508
    /**
509
     * return a part of the HTML code, for error message
510
     *
511
     * @param   integer position
512
     * @param   integer take before
513
     * @param   integer take after
514
     * @return  string  part of the html code
515
     */
516
    public function getHtmlErrorCode($pos, $before=30, $after=40)
517
    {
518
        return substr($this->_html, $pos-$before, $before+$after);
519
    }
520
}