1 | <?php |
||
2 | /** |
||
3 | * Copyright (c) 2017–2019 Ryan Parman <http://ryanparman.com>. |
||
4 | * Copyright (c) 2017–2019 Contributors. |
||
5 | * |
||
6 | * http://opensource.org/licenses/Apache2.0 |
||
7 | */ |
||
8 | |||
9 | declare(strict_types=1); |
||
10 | |||
11 | namespace SimplePie\Parser; |
||
12 | |||
13 | use DOMComment; |
||
14 | use DOMDocument; |
||
15 | use DOMNode; |
||
16 | use DOMText; |
||
17 | use DOMXPath; |
||
18 | use Psr\Http\Message\StreamInterface; |
||
19 | use Psr\Log\LoggerInterface; |
||
20 | use SimplePie\Enum as E; |
||
21 | use SimplePie\HandlerStackInterface; |
||
22 | use SimplePie\Mixin as Tr; |
||
23 | use SimplePie\SimplePie; |
||
24 | use SimplePie\Type\Feed; |
||
25 | use SimplePie\Util\Ns; |
||
26 | |||
27 | /** |
||
28 | * The core parser for all XML content. |
||
29 | */ |
||
30 | class Xml extends AbstractParser |
||
0 ignored issues
–
show
|
|||
31 | { |
||
32 | use Tr\DomDocumentTrait; |
||
0 ignored issues
–
show
The type
SimplePie\Mixin\DomDocumentTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||
33 | use Tr\LoggerTrait; |
||
0 ignored issues
–
show
The type
SimplePie\Mixin\LoggerTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||
34 | use Tr\RawDocumentTrait; |
||
0 ignored issues
–
show
The type
SimplePie\Mixin\RawDocumentTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||
35 | |||
36 | /** |
||
37 | * The object which contains the parsed results. |
||
38 | * |
||
39 | * @var Feed |
||
40 | */ |
||
41 | protected $feed; |
||
42 | |||
43 | /** |
||
44 | * Bitwise libxml options to use for parsing XML. |
||
45 | * |
||
46 | * @var int |
||
47 | */ |
||
48 | protected $libxml; |
||
49 | |||
50 | /** |
||
51 | * The handler stack which contains registered middleware. |
||
52 | * |
||
53 | * @var HandlerStackInterface |
||
54 | */ |
||
55 | protected $middleware; |
||
56 | |||
57 | /** |
||
58 | * The XML namespace handler. |
||
59 | * |
||
60 | * @var Ns |
||
61 | */ |
||
62 | protected $ns; |
||
63 | |||
64 | /** |
||
65 | * Constructs a new instance of this class. |
||
66 | * |
||
67 | * @param StreamInterface $stream A PSR-7 `StreamInterface` which is typically returned by |
||
68 | * the `getBody()` method of a `ResponseInterface` class. |
||
69 | * @param LoggerInterface $logger The PSR-3 logger. |
||
70 | * @param HandlerStackInterface $handlerStack The handler stack which contains registered middleware. |
||
71 | * @param int $libxml The libxml value to use for parsing XML. |
||
72 | * @param bool $handleHtmlEntitiesInXml Whether or not SimplePie should pre-parse the XML as HTML |
||
73 | * to resolve the entities. A value of `true` means that |
||
74 | * SimplePie should inject the entity definitions. A value of |
||
75 | * `false` means that SimplePie should NOT inject the entity |
||
76 | * definitions. The default value is `false`. |
||
77 | * |
||
78 | * @throws Error |
||
79 | * @throws TypeError |
||
80 | * |
||
81 | * @phpcs:disable Generic.Functions.OpeningFunctionBraceBsdAllman.BraceOnSameLine |
||
82 | */ |
||
83 | 560 | public function __construct( |
|
84 | StreamInterface $stream, |
||
85 | LoggerInterface $logger, |
||
86 | HandlerStackInterface $handlerStack, |
||
87 | int $libxml, |
||
88 | bool $handleHtmlEntitiesInXml |
||
89 | ) { |
||
90 | // @phpcs:enable |
||
91 | |||
92 | // Logger |
||
93 | 560 | $this->logger = $logger; |
|
0 ignored issues
–
show
|
|||
94 | |||
95 | // Middleware |
||
96 | 560 | $this->middleware = $handlerStack; |
|
97 | |||
98 | // Libxml2 |
||
99 | 560 | $this->libxml = $libxml; |
|
100 | |||
101 | // Raw stream |
||
102 | 560 | $this->rawDocument = $this->readStream($stream); |
|
0 ignored issues
–
show
|
|||
103 | |||
104 | // DOMDocument |
||
105 | 560 | $this->domDocument = new DOMDocument('1.0', 'utf-8'); |
|
0 ignored issues
–
show
|
|||
106 | |||
107 | // Don't barf errors all over the output |
||
108 | 560 | \libxml_use_internal_errors(true); |
|
109 | |||
110 | // DOMDocument configuration |
||
111 | 560 | $this->domDocument->recover = true; |
|
112 | 560 | $this->domDocument->formatOutput = false; |
|
113 | 560 | $this->domDocument->preserveWhiteSpace = false; |
|
114 | 560 | $this->domDocument->resolveExternals = true; |
|
115 | 560 | $this->domDocument->substituteEntities = true; |
|
116 | 560 | $this->domDocument->strictErrorChecking = false; |
|
117 | 560 | $this->domDocument->validateOnParse = false; |
|
118 | |||
119 | // If enabled, force-inject the contents of `entities.dtd` into the feed. |
||
120 | 560 | if ($handleHtmlEntitiesInXml) { |
|
121 | 361 | $this->getLogger()->debug('Enabled handing HTML entities in XML.'); |
|
122 | 361 | $this->domDocument->loadXML($this->rawDocument, $this->libxml); |
|
123 | |||
124 | // Make sure this is an XML element instead of a comment or text. |
||
125 | 361 | $firstElement = $this->findNextRealNode($this->domDocument->firstChild); |
|
126 | |||
127 | // <feed, <rss, etc. |
||
128 | 361 | $rootElementStart = \sprintf('<%s', (string) $firstElement->nodeName); |
|
129 | |||
130 | // Read the entity definition file, and force-inject it into the XML document |
||
131 | 361 | $this->rawDocument = \str_replace( |
|
132 | 361 | $rootElementStart, |
|
133 | 361 | \sprintf( |
|
134 | 361 | '%s%s', |
|
135 | 361 | \trim( |
|
136 | 361 | \file_get_contents(\dirname(SIMPLEPIE_ROOT) . '/resources/entities.dtd') |
|
137 | ), |
||
138 | 361 | $rootElementStart |
|
139 | ), |
||
140 | 361 | $this->rawDocument |
|
141 | ); |
||
142 | } |
||
143 | |||
144 | // Parse the XML document with the configured libxml options |
||
145 | 560 | $this->domDocument->loadXML($this->rawDocument, $this->libxml); |
|
146 | |||
147 | // Register the namespace handler. |
||
148 | 560 | $this->ns = (new Ns($this->domDocument)) |
|
149 | 560 | ->setLogger($this->getLogger()); |
|
150 | |||
151 | // Look at which namespaces the registered middleware understands. |
||
152 | 560 | $this->middleware->registerNamespaces($this->ns); |
|
153 | |||
154 | // Instantiate a new write-to feed object. |
||
155 | 560 | $this->feed = (new Feed($this->getNamespaceAlias() ?? '')) |
|
156 | 560 | ->setLogger($this->getLogger()); |
|
157 | |||
158 | // Invoke the registered middleware. |
||
159 | 560 | $this->middleware->invoke( |
|
160 | 560 | E\FeedType::XML, |
|
161 | 560 | $this->getFeed()->getRoot(), |
|
162 | 560 | $this->getNamespaceAlias(), |
|
163 | 560 | $this->xpath() |
|
164 | ); |
||
165 | |||
166 | // Clear the libxml errors to avoid excessive memory usage |
||
167 | 560 | \libxml_clear_errors(); |
|
168 | 560 | } |
|
169 | |||
170 | /** |
||
171 | * Get the XML namespace handler. |
||
172 | */ |
||
173 | 560 | public function getNs(): Ns |
|
174 | { |
||
175 | 560 | return $this->ns; |
|
176 | } |
||
177 | |||
178 | /** |
||
179 | * Get the preferred namespace alias. |
||
180 | */ |
||
181 | 560 | public function getNamespaceAlias(): ?string |
|
182 | { |
||
183 | 560 | $namespace = $this->getNs(); |
|
184 | |||
185 | 560 | return $namespace->getPreferredNamespaceAlias( |
|
186 | 560 | $this->domDocument->documentElement->namespaceURI |
|
187 | ); |
||
188 | } |
||
189 | |||
190 | /** |
||
191 | * Gets a reference to the `DOMXPath` object, with the default namespace |
||
192 | * already registered. |
||
193 | * |
||
194 | * @return DOMXPath |
||
195 | */ |
||
196 | 560 | public function xpath() |
|
197 | { |
||
198 | 560 | $ns = $this->getNamespaceAlias(); |
|
199 | 560 | $xpath = new DOMXPath($this->domDocument); |
|
200 | |||
201 | // Register the namespace alias with the XPath instance |
||
202 | 560 | if (null !== $ns) { |
|
0 ignored issues
–
show
|
|||
203 | 560 | $xpath->registerNamespace( |
|
204 | 560 | $ns, |
|
205 | 560 | $this->domDocument->documentElement->namespaceURI ?? '' |
|
206 | ); |
||
207 | } |
||
208 | |||
209 | 560 | return $xpath; |
|
210 | } |
||
211 | |||
212 | /** |
||
213 | * Some DOMNode names are `#comment` or `#text`. This method will move the |
||
214 | * pointer to the next node, then the next until it finds a real XML node. |
||
215 | * |
||
216 | * @param DOMNode $node The `DOMNode` element to evaluate. |
||
217 | */ |
||
218 | 361 | public function findNextRealNode(DOMNode $node): DOMNode |
|
219 | { |
||
220 | 361 | $n = $node; |
|
221 | |||
222 | 361 | while (($n instanceof DOMComment || $n instanceof DOMText) && null !== $n) { |
|
223 | 322 | $n = $n->nextSibling; |
|
224 | } |
||
225 | |||
226 | 361 | return $n; |
|
227 | } |
||
228 | |||
229 | /** |
||
230 | * Retrieves the object which represents the top-level feed. |
||
231 | */ |
||
232 | 560 | public function getFeed(): Feed |
|
233 | { |
||
234 | 560 | return $this->feed; |
|
235 | } |
||
236 | } |
||
237 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths