1 | <?php |
||||
2 | |||||
3 | namespace Wa72\HtmlPageDom; |
||||
4 | |||||
5 | /** |
||||
6 | * Static helper functions for HtmlPageDom |
||||
7 | * |
||||
8 | * @package Wa72\HtmlPageDom |
||||
9 | */ |
||||
10 | class Helpers |
||||
11 | { |
||||
12 | /** |
||||
13 | * remove newlines from string and minimize whitespace (multiple whitespace characters replaced by one space) |
||||
14 | * useful for cleaning up text retrieved by HtmlPageCrawler::text() (nodeValue of a DOMNode) |
||||
15 | * |
||||
16 | * @param string $string |
||||
17 | * @return string |
||||
18 | */ |
||||
19 | 8 | public static function trimNewlines($string) |
|||
20 | { |
||||
21 | 8 | $string = str_replace("\n", ' ', $string); |
|||
22 | 8 | $string = str_replace("\r", ' ', $string); |
|||
23 | 8 | $string = preg_replace('/\s+/', ' ', $string); |
|||
24 | |||||
25 | 8 | return trim($string); |
|||
26 | } |
||||
27 | |||||
28 | /** |
||||
29 | * Convert CSS string to array |
||||
30 | * |
||||
31 | * @param string $css list of CSS properties separated by ; |
||||
32 | * @return array name=>value pairs of CSS properties |
||||
33 | */ |
||||
34 | 8 | public static function cssStringToArray($css) |
|||
35 | { |
||||
36 | 8 | $statements = explode(';', preg_replace('/\s+/s', ' ', $css)); |
|||
37 | 8 | $styles = []; |
|||
38 | 8 | foreach ($statements as $statement) { |
|||
39 | 8 | $statement = trim($statement); |
|||
40 | 8 | if ('' === $statement) { |
|||
41 | 8 | continue; |
|||
42 | } |
||||
43 | 8 | $p = strpos($statement, ':'); |
|||
44 | 8 | if ($p <= 0) { |
|||
45 | 8 | continue; |
|||
46 | } // invalid statement, just ignore it |
||||
47 | 8 | $key = trim(substr($statement, 0, $p)); |
|||
48 | 8 | $value = trim(substr($statement, $p + 1)); |
|||
49 | 8 | $styles[$key] = $value; |
|||
50 | } |
||||
51 | |||||
52 | 8 | return $styles; |
|||
53 | } |
||||
54 | |||||
55 | /** |
||||
56 | * Convert CSS name->value array to string |
||||
57 | * |
||||
58 | * @param array $array name=>value pairs of CSS properties |
||||
59 | * @return string list of CSS properties separated by ; |
||||
60 | */ |
||||
61 | 8 | public static function cssArrayToString($array) |
|||
62 | { |
||||
63 | 8 | $styles = ''; |
|||
64 | 8 | foreach ($array as $key => $value) { |
|||
65 | 8 | $styles .= $key . ': ' . $value . ';'; |
|||
66 | } |
||||
67 | |||||
68 | 8 | return $styles; |
|||
69 | } |
||||
70 | |||||
71 | /** |
||||
72 | * Helper function for getting a body element |
||||
73 | * from an HTML fragment |
||||
74 | * |
||||
75 | * @param string $html A fragment of HTML code |
||||
76 | * @param string $charset |
||||
77 | * @return \DOMNode The body node containing child nodes created from the HTML fragment |
||||
78 | */ |
||||
79 | 120 | public static function getBodyNodeFromHtmlFragment($html, $charset = 'UTF-8') |
|||
80 | { |
||||
81 | 120 | $html = '<html><body>' . $html . '</body></html>'; |
|||
82 | 120 | $current = libxml_use_internal_errors(true); |
|||
83 | |||||
84 | 120 | if (\PHP_VERSION_ID < 80000) { |
|||
85 | 60 | $disableEntities = libxml_disable_entity_loader(true); |
|||
86 | } |
||||
87 | |||||
88 | 120 | $d = new \DOMDocument('1.0', $charset); |
|||
89 | 120 | $d->validateOnParse = true; |
|||
90 | 120 | if (function_exists('mb_convert_encoding') && in_array( |
|||
91 | 120 | strtolower($charset), |
|||
92 | 120 | array_map('strtolower', mb_list_encodings()) |
|||
93 | ) |
||||
94 | ) { |
||||
95 | 120 | $html = mb_convert_encoding($html, 'HTML-ENTITIES', $charset); |
|||
96 | } |
||||
97 | 120 | @$d->loadHTML($html); |
|||
0 ignored issues
–
show
It seems like
$html can also be of type array ; however, parameter $source of DOMDocument::loadHTML() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
98 | 120 | libxml_use_internal_errors($current); |
|||
99 | |||||
100 | 120 | if (\PHP_VERSION_ID < 80000) { |
|||
101 | 60 | libxml_disable_entity_loader($disableEntities); |
|||
102 | } |
||||
103 | |||||
104 | 120 | return $d->getElementsByTagName('body')->item(0); |
|||
105 | } |
||||
106 | } |
||||
107 |
If you suppress an error, we recommend checking for the error condition explicitly: