1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Load a DOM document from a xml string |
4
|
|
|
* |
5
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
6
|
|
|
* @copyright Copyright (c) 2009-2017 FluentDOM Contributors |
7
|
|
|
*/ |
8
|
|
|
|
9
|
|
|
namespace FluentDOM\Loader { |
10
|
|
|
|
11
|
|
|
use FluentDOM\DOM\Document; |
12
|
|
|
use FluentDOM\DOM\DocumentFragment; |
13
|
|
|
use FluentDOM\DOM\ProcessingInstruction; |
14
|
|
|
use FluentDOM\Loadable; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* Load a DOM document from a xml string |
18
|
|
|
*/ |
19
|
|
|
class Html implements Loadable { |
20
|
|
|
|
21
|
|
|
use Supports\Libxml; |
22
|
|
|
|
23
|
|
|
const IS_FRAGMENT = 'is_fragment'; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @return string[] |
27
|
|
|
*/ |
28
|
19 |
|
public function getSupported(): array { |
29
|
19 |
|
return ['html', 'text/html', 'html-fragment', 'text/html-fragment']; |
30
|
|
|
} |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @see Loadable::load |
34
|
|
|
* @param string $source |
35
|
|
|
* @param string $contentType |
36
|
|
|
* @param array|\Traversable|Options $options |
37
|
|
|
* @return Document|Result|NULL |
38
|
|
|
* @throws \FluentDOM\Exceptions\InvalidSource\TypeString |
39
|
|
|
* @throws \FluentDOM\Exceptions\InvalidSource\TypeFile |
40
|
|
|
*/ |
41
|
14 |
|
public function load($source, string $contentType, $options = []) { |
42
|
14 |
|
if ($this->supports($contentType)) { |
43
|
13 |
|
return (new Libxml\Errors())->capture( |
44
|
|
|
function() use ($source, $contentType, $options) { |
45
|
13 |
|
$selection = FALSE; |
46
|
13 |
|
$document = new Document(); |
47
|
13 |
|
$settings = $this->getOptions($options); |
48
|
13 |
|
if ($this->isFragment($contentType, $settings)) { |
49
|
2 |
|
$this->loadFragmentIntoDom($document, $source, $settings); |
50
|
2 |
|
$selection = $document->evaluate('/node()'); |
51
|
|
|
} else { |
52
|
11 |
|
$settings->isAllowed($sourceType = $settings->getSourceType($source)); |
53
|
|
|
switch ($sourceType) { |
54
|
10 |
|
case Options::IS_FILE : |
55
|
2 |
|
$document->loadHTMLFile($source, $settings[Options::LIBXML_OPTIONS]); |
56
|
2 |
|
break; |
57
|
8 |
|
case Options::IS_STRING : |
58
|
|
|
default : |
59
|
8 |
|
$document->loadHTML( |
60
|
8 |
|
$this->ensureEncodingPI($source, $settings[Options::ENCODING], $settings[Options::FORCE_ENCODING]), |
61
|
8 |
|
$settings[Options::LIBXML_OPTIONS] |
62
|
|
|
); |
63
|
|
|
} |
64
|
|
|
} |
65
|
|
|
/** @var ProcessingInstruction $pi */ |
66
|
12 |
|
if ($pi = $document->xpath()->firstOf('//processing-instruction()')) { |
67
|
8 |
|
$pi->remove(); |
68
|
|
|
} |
69
|
12 |
|
return new Result($document, 'text/html', $selection); |
70
|
13 |
|
} |
71
|
|
|
); |
72
|
|
|
} |
73
|
1 |
|
return NULL; |
74
|
|
|
} |
75
|
|
|
|
76
|
12 |
|
private function ensureEncodingPI(string $source, string $encoding = NULL, bool $force = NULL): string { |
77
|
12 |
|
$hasXmlPi = preg_match('(<\\?xml\\s)', $source); |
78
|
12 |
|
if (!$force && ($charset = $this->getCharsetFromMetaTag($source))) { |
79
|
1 |
|
$encoding = (string)$charset; |
80
|
|
|
} |
81
|
12 |
|
$pi = '<?xml version="1.0" encoding="'.htmlspecialchars($encoding).'"?>'; |
82
|
12 |
|
if (!$hasXmlPi) { |
83
|
10 |
|
return $pi.$source; |
84
|
|
|
} |
85
|
2 |
|
if ($force) { |
86
|
1 |
|
return preg_replace('(<\\?xml\\s[^?>]*?>)', $pi, $source, 1); |
87
|
|
|
} |
88
|
1 |
|
return $source; |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* @param string $source |
93
|
|
|
* @return string|bool |
94
|
|
|
*/ |
95
|
11 |
|
private function getCharsetFromMetaTag(string $source) { |
96
|
11 |
|
$hasMetaTag = preg_match( |
97
|
11 |
|
'(<meta\\s+[^>]*charset=["\']\s*(?<charset>[^\\s\'">]+)\s*["\'])i', |
98
|
11 |
|
$source, |
99
|
11 |
|
$match |
100
|
|
|
); |
101
|
11 |
|
if ($hasMetaTag) { |
102
|
1 |
|
return $match['charset']; |
103
|
|
|
} |
104
|
10 |
|
$hasMetaTag = preg_match( |
105
|
10 |
|
'(<meta\\s+[^>]*http-equiv=["\']content-type["\'][^>]*>)i', |
106
|
10 |
|
$source, |
107
|
10 |
|
$match |
108
|
|
|
); |
109
|
10 |
|
if ($hasMetaTag) { |
110
|
1 |
|
preg_match( |
111
|
1 |
|
'(content=["\']\s*[^#\']+;\s*charset\s*=\s*(?<encoding>[^\S\'">]+))', |
112
|
1 |
|
$match[0], |
113
|
1 |
|
$match |
114
|
|
|
); |
115
|
1 |
|
return $match['encoding'] ?? FALSE; |
116
|
|
|
} |
117
|
9 |
|
return FALSE; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* @see LoadableFragment::loadFragment |
122
|
|
|
* @param string $source |
123
|
|
|
* @param string $contentType |
124
|
|
|
* @param array|\Traversable|Options $options |
125
|
|
|
* @return DocumentFragment|NULL |
126
|
|
|
*/ |
127
|
3 |
|
public function loadFragment($source, string $contentType, $options = []) { |
128
|
3 |
|
if ($this->supports($contentType)) { |
129
|
2 |
|
$options = $this->getOptions($options); |
130
|
2 |
|
return (new Libxml\Errors())->capture( |
131
|
2 |
|
function() use ($source, $options) { |
132
|
2 |
|
$document = new Document(); |
133
|
2 |
|
$fragment = $document->createDocumentFragment(); |
134
|
2 |
|
$document->loadHTML( |
135
|
2 |
|
$this->ensureEncodingPI( |
136
|
2 |
|
'<html-fragment>'.$source.'</html-fragment>', |
137
|
2 |
|
$options[Options::ENCODING], |
138
|
2 |
|
$options[Options::FORCE_ENCODING] |
139
|
|
|
), |
140
|
2 |
|
$options[Options::LIBXML_OPTIONS] |
141
|
|
|
); |
142
|
2 |
|
$nodes = $document->evaluate('//html-fragment[1]/node()'); |
143
|
2 |
|
foreach ($nodes as $node) { |
144
|
2 |
|
$fragment->append($node); |
145
|
|
|
} |
146
|
2 |
|
return $fragment; |
147
|
2 |
|
} |
148
|
|
|
); |
149
|
|
|
} |
150
|
1 |
|
return NULL; |
151
|
|
|
} |
152
|
|
|
|
153
|
13 |
|
private function isFragment(string $contentType, $options) { |
154
|
|
|
return ( |
155
|
13 |
|
$contentType === 'html-fragment' || |
156
|
13 |
|
$contentType === 'text/html-fragment' || |
157
|
13 |
|
$options[self::IS_FRAGMENT] |
158
|
|
|
); |
159
|
|
|
} |
160
|
|
|
|
161
|
2 |
|
private function loadFragmentIntoDom(\DOMDocument $document, string $source, $settings) { |
162
|
2 |
|
$htmlDom = new Document(); |
163
|
2 |
|
$htmlDom->loadHTML( |
164
|
2 |
|
$this->ensureEncodingPI( |
165
|
2 |
|
'<html-fragment>'.$source.'</html-fragment>', |
166
|
2 |
|
$settings[Options::ENCODING], |
167
|
2 |
|
$settings[Options::FORCE_ENCODING] |
168
|
|
|
), |
169
|
2 |
|
$settings[Options::LIBXML_OPTIONS] |
170
|
|
|
); |
171
|
2 |
|
$nodes = $htmlDom->evaluate('//html-fragment[1]/node()'); |
172
|
2 |
|
foreach ($nodes as $node) { |
173
|
|
|
/** @var \DOMNode $node */ |
174
|
2 |
|
if ($importedNode = $document->importNode($node, TRUE)) { |
175
|
2 |
|
$document->appendChild($importedNode); |
176
|
|
|
} |
177
|
|
|
} |
178
|
2 |
|
} |
179
|
|
|
} |
180
|
|
|
} |