1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Load a DOM document from a xml string |
4
|
|
|
* |
5
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
6
|
|
|
* @copyright Copyright (c) 2009-2014 Bastian Feder, Thomas Weinert |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace FluentDOM\Loader { |
10
|
|
|
|
11
|
|
|
use FluentDOM\Document; |
12
|
|
|
use FluentDOM\DocumentFragment; |
13
|
|
|
use FluentDOM\Loadable; |
14
|
|
|
use FluentDOM\ProcessingInstruction; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* Load a DOM document from a xml string |
18
|
|
|
*/ |
19
|
|
|
class Html implements Loadable { |
20
|
|
|
|
21
|
|
|
use Supports\Libxml; |
22
|
|
|
|
23
|
|
|
const IS_FRAGMENT = 'is_fragment'; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @return string[] |
27
|
|
|
*/ |
28
|
17 |
|
public function getSupported() { |
29
|
17 |
|
return array('html', 'text/html', 'html-fragment', 'text/html-fragment'); |
30
|
|
|
} |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @see Loadable::load |
34
|
|
|
* @param string $source |
35
|
|
|
* @param string $contentType |
36
|
|
|
* @param array|\Traversable|Options $options |
37
|
|
|
* @return Document|Result|NULL |
38
|
|
|
*/ |
39
|
12 |
|
public function load($source, $contentType, $options = []) { |
40
|
12 |
|
if ($this->supports($contentType)) { |
41
|
11 |
|
return (new Libxml\Errors())->capture( |
42
|
|
|
function() use ($source, $contentType, $options) { |
43
|
11 |
|
$selection = false; |
44
|
11 |
|
$document = new Document(); |
45
|
11 |
|
$settings = $this->getOptions($options); |
46
|
11 |
|
if ($this->isFragment($contentType, $settings)) { |
47
|
2 |
|
$this->loadFragmentIntoDom($document, $source, $settings); |
48
|
2 |
|
$selection = $document->evaluate('/*'); |
49
|
2 |
|
} else { |
50
|
9 |
|
$settings->isAllowed($sourceType = $settings->getSourceType($source)); |
51
|
|
|
switch ($sourceType) { |
52
|
8 |
|
case Options::IS_FILE : |
53
|
2 |
|
$document->loadHTMLFile($source, $settings[Options::LIBXML_OPTIONS]); |
54
|
2 |
|
break; |
55
|
6 |
|
case Options::IS_STRING : |
56
|
6 |
|
default : |
57
|
6 |
|
$document->loadHTML( |
58
|
6 |
|
$this->ensureEncodingPI($source, $settings[Options::ENCODING], $settings[Options::FORCE_ENCODING]), |
59
|
6 |
|
$settings[Options::LIBXML_OPTIONS] |
60
|
6 |
|
); |
61
|
6 |
|
} |
62
|
|
|
} |
63
|
|
|
/** @var ProcessingInstruction $pi */ |
64
|
10 |
|
if ($pi = $document->xpath()->firstOf('//processing-instruction()')) { |
65
|
6 |
|
$pi->remove(); |
|
|
|
|
66
|
6 |
|
} |
67
|
10 |
|
return new Result($document, 'text/html', $selection); |
68
|
|
|
} |
69
|
11 |
|
); |
70
|
|
|
} |
71
|
1 |
|
return NULL; |
72
|
|
|
} |
73
|
|
|
|
74
|
10 |
|
private function ensureEncodingPI($source, $encoding, $force = FALSE) { |
75
|
10 |
|
$hasXmlPi = preg_match('(<\\?xml\\s)', $source); |
76
|
10 |
|
if (!$force) { |
77
|
10 |
|
if ($charset = $this->getCharsetFromMetaTag($source)) { |
78
|
|
|
$encoding = $charset; |
79
|
|
|
} |
80
|
10 |
|
} |
81
|
10 |
|
$pi = '<?xml version="1.0" encoding="'.htmlspecialchars($encoding).'"?>'; |
82
|
10 |
|
if (!$hasXmlPi) { |
83
|
10 |
|
return $pi.$source; |
84
|
|
|
} elseif ($force) { |
85
|
|
|
return preg_replace('(<\\?xml\\s[^?>]*?>)', $pi, $source, 1); |
86
|
|
|
} |
87
|
|
|
return $source; |
88
|
8 |
|
} |
89
|
|
|
|
90
|
10 |
|
private function getCharsetFromMetaTag($source) { |
91
|
10 |
|
$hasMetaTag = preg_match( |
92
|
10 |
|
'(<\\meta\\s+[^>]*charset=["\']\s*(?<charset>[^\S\'">]+)\s*["\'])i', |
93
|
10 |
|
$source, |
94
|
|
|
$match |
95
|
10 |
|
); |
96
|
10 |
|
if ($hasMetaTag) { |
97
|
|
|
return $match['charset']; |
98
|
|
|
} else { |
99
|
10 |
|
$hasMetaTag = preg_match( |
100
|
10 |
|
'(<\\meta\\s+[^>]*http-equiv=["\']content-type["\'][^>]*>)i', |
101
|
10 |
|
$source, |
102
|
|
|
$match |
103
|
10 |
|
); |
104
|
10 |
|
if ($hasMetaTag) { |
105
|
1 |
|
preg_match( |
106
|
1 |
|
'(content=["\']\s*[^#\']+;\s*charset\s*=\s*(?<encoding>[^\S\'">]+))', |
107
|
1 |
|
$match[0], |
108
|
|
|
$match |
109
|
1 |
|
); |
110
|
1 |
|
return isset($match['encoding']) ? $match['encoding'] : FALSE; |
111
|
|
|
} |
112
|
|
|
} |
113
|
9 |
|
return false; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* @see LoadableFragment::loadFragment |
118
|
|
|
* @param string $source |
119
|
|
|
* @param string $contentType |
120
|
|
|
* @param array|\Traversable|Options $options |
121
|
|
|
* @return DocumentFragment|NULL |
122
|
|
|
*/ |
123
|
11 |
|
public function loadFragment($source, $contentType, $options = []) { |
124
|
3 |
|
if ($this->supports($contentType)) { |
125
|
2 |
|
$options = $this->getOptions($options); |
126
|
2 |
|
return (new Libxml\Errors())->capture( |
127
|
11 |
|
function() use ($source, $options) { |
128
|
2 |
|
$document = new Document(); |
129
|
2 |
|
$fragment = $document->createDocumentFragment(); |
130
|
2 |
|
$document->loadHTML( |
131
|
2 |
|
$this->ensureEncodingPI( |
132
|
2 |
|
'<html-fragment>'.$source.'</html-fragment>', |
133
|
2 |
|
$options[Options::ENCODING], |
134
|
2 |
|
$options[Options::FORCE_ENCODING] |
135
|
2 |
|
), |
136
|
2 |
|
$options[Options::LIBXML_OPTIONS] |
137
|
2 |
|
); |
138
|
2 |
|
$nodes = $document->evaluate('//html-fragment[1]/node()'); |
139
|
2 |
|
foreach ($nodes as $node) { |
140
|
2 |
|
$fragment->append($node); |
141
|
11 |
|
} |
142
|
2 |
|
return $fragment; |
143
|
8 |
|
} |
144
|
2 |
|
); |
145
|
|
|
} |
146
|
1 |
|
return NULL; |
147
|
|
|
} |
148
|
|
|
|
149
|
11 |
|
private function isFragment($contentType, $options) { |
150
|
|
|
return ( |
151
|
11 |
|
$contentType == 'html-fragment' || |
152
|
11 |
|
$contentType == 'text/html-fragment' || |
153
|
9 |
|
$options[self::IS_FRAGMENT] |
154
|
11 |
|
); |
155
|
|
|
} |
156
|
|
|
|
157
|
8 |
|
private function loadFragmentIntoDom(\DOMDocument $document, $source, $settings) { |
158
|
2 |
|
$htmlDom = new Document(); |
159
|
2 |
|
$htmlDom->loadHTML( |
160
|
2 |
|
$this->ensureEncodingPI( |
161
|
2 |
|
'<html-fragment>'.$source.'</html-fragment>', |
162
|
5 |
|
$settings[Options::ENCODING], |
163
|
3 |
|
$settings[Options::FORCE_ENCODING] |
164
|
2 |
|
), |
165
|
2 |
|
$settings[Options::LIBXML_OPTIONS] |
166
|
2 |
|
); |
167
|
8 |
|
$nodes = $htmlDom->evaluate('//html-fragment[1]/node()'); |
168
|
2 |
|
foreach ($nodes as $node) { |
|
|
|
|
169
|
2 |
|
if ($importedNode = $document->importNode($node, TRUE)) { |
170
|
2 |
|
$document->appendChild($importedNode); |
171
|
2 |
|
} |
172
|
2 |
|
} |
173
|
2 |
|
} |
174
|
|
|
} |
175
|
|
|
} |
This check marks calls to methods that do not seem to exist on an object.
This is most likely the result of a method being renamed without all references to it being renamed likewise.