simplepie /
simplepie-ng
| 1 | <?php |
||
| 2 | /** |
||
| 3 | * Copyright (c) 2017–2019 Ryan Parman <http://ryanparman.com>. |
||
| 4 | * Copyright (c) 2017–2019 Contributors. |
||
| 5 | * |
||
| 6 | * http://opensource.org/licenses/Apache2.0 |
||
| 7 | */ |
||
| 8 | |||
| 9 | declare(strict_types=1); |
||
| 10 | |||
| 11 | namespace SimplePie\Parser; |
||
| 12 | |||
| 13 | use DOMComment; |
||
| 14 | use DOMDocument; |
||
| 15 | use DOMNode; |
||
| 16 | use DOMText; |
||
| 17 | use DOMXPath; |
||
| 18 | use Psr\Http\Message\StreamInterface; |
||
| 19 | use Psr\Log\LoggerInterface; |
||
| 20 | use SimplePie\Enum as E; |
||
| 21 | use SimplePie\HandlerStackInterface; |
||
| 22 | use SimplePie\Mixin as Tr; |
||
| 23 | use SimplePie\SimplePie; |
||
| 24 | use SimplePie\Type\Feed; |
||
| 25 | use SimplePie\Util\Ns; |
||
| 26 | |||
| 27 | /** |
||
| 28 | * The core parser for all XML content. |
||
| 29 | */ |
||
| 30 | class Xml extends AbstractParser |
||
|
0 ignored issues
–
show
|
|||
| 31 | { |
||
| 32 | use Tr\DomDocumentTrait; |
||
|
0 ignored issues
–
show
The type
SimplePie\Mixin\DomDocumentTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||
| 33 | use Tr\LoggerTrait; |
||
|
0 ignored issues
–
show
The type
SimplePie\Mixin\LoggerTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||
| 34 | use Tr\RawDocumentTrait; |
||
|
0 ignored issues
–
show
The type
SimplePie\Mixin\RawDocumentTrait was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||
| 35 | |||
| 36 | /** |
||
| 37 | * The object which contains the parsed results. |
||
| 38 | * |
||
| 39 | * @var Feed |
||
| 40 | */ |
||
| 41 | protected $feed; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Bitwise libxml options to use for parsing XML. |
||
| 45 | * |
||
| 46 | * @var int |
||
| 47 | */ |
||
| 48 | protected $libxml; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * The handler stack which contains registered middleware. |
||
| 52 | * |
||
| 53 | * @var HandlerStackInterface |
||
| 54 | */ |
||
| 55 | protected $middleware; |
||
| 56 | |||
| 57 | /** |
||
| 58 | * The XML namespace handler. |
||
| 59 | * |
||
| 60 | * @var Ns |
||
| 61 | */ |
||
| 62 | protected $ns; |
||
| 63 | |||
| 64 | /** |
||
| 65 | * Constructs a new instance of this class. |
||
| 66 | * |
||
| 67 | * @param StreamInterface $stream A PSR-7 `StreamInterface` which is typically returned by |
||
| 68 | * the `getBody()` method of a `ResponseInterface` class. |
||
| 69 | * @param LoggerInterface $logger The PSR-3 logger. |
||
| 70 | * @param HandlerStackInterface $handlerStack The handler stack which contains registered middleware. |
||
| 71 | * @param int $libxml The libxml value to use for parsing XML. |
||
| 72 | * @param bool $handleHtmlEntitiesInXml Whether or not SimplePie should pre-parse the XML as HTML |
||
| 73 | * to resolve the entities. A value of `true` means that |
||
| 74 | * SimplePie should inject the entity definitions. A value of |
||
| 75 | * `false` means that SimplePie should NOT inject the entity |
||
| 76 | * definitions. The default value is `false`. |
||
| 77 | * |
||
| 78 | * @throws Error |
||
| 79 | * @throws TypeError |
||
| 80 | * |
||
| 81 | * @phpcs:disable Generic.Functions.OpeningFunctionBraceBsdAllman.BraceOnSameLine |
||
| 82 | */ |
||
| 83 | 560 | public function __construct( |
|
| 84 | StreamInterface $stream, |
||
| 85 | LoggerInterface $logger, |
||
| 86 | HandlerStackInterface $handlerStack, |
||
| 87 | int $libxml, |
||
| 88 | bool $handleHtmlEntitiesInXml |
||
| 89 | ) { |
||
| 90 | // @phpcs:enable |
||
| 91 | |||
| 92 | // Logger |
||
| 93 | 560 | $this->logger = $logger; |
|
|
0 ignored issues
–
show
|
|||
| 94 | |||
| 95 | // Middleware |
||
| 96 | 560 | $this->middleware = $handlerStack; |
|
| 97 | |||
| 98 | // Libxml2 |
||
| 99 | 560 | $this->libxml = $libxml; |
|
| 100 | |||
| 101 | // Raw stream |
||
| 102 | 560 | $this->rawDocument = $this->readStream($stream); |
|
|
0 ignored issues
–
show
|
|||
| 103 | |||
| 104 | // DOMDocument |
||
| 105 | 560 | $this->domDocument = new DOMDocument('1.0', 'utf-8'); |
|
|
0 ignored issues
–
show
|
|||
| 106 | |||
| 107 | // Don't barf errors all over the output |
||
| 108 | 560 | \libxml_use_internal_errors(true); |
|
| 109 | |||
| 110 | // DOMDocument configuration |
||
| 111 | 560 | $this->domDocument->recover = true; |
|
| 112 | 560 | $this->domDocument->formatOutput = false; |
|
| 113 | 560 | $this->domDocument->preserveWhiteSpace = false; |
|
| 114 | 560 | $this->domDocument->resolveExternals = true; |
|
| 115 | 560 | $this->domDocument->substituteEntities = true; |
|
| 116 | 560 | $this->domDocument->strictErrorChecking = false; |
|
| 117 | 560 | $this->domDocument->validateOnParse = false; |
|
| 118 | |||
| 119 | // If enabled, force-inject the contents of `entities.dtd` into the feed. |
||
| 120 | 560 | if ($handleHtmlEntitiesInXml) { |
|
| 121 | 361 | $this->getLogger()->debug('Enabled handing HTML entities in XML.'); |
|
| 122 | 361 | $this->domDocument->loadXML($this->rawDocument, $this->libxml); |
|
| 123 | |||
| 124 | // Make sure this is an XML element instead of a comment or text. |
||
| 125 | 361 | $firstElement = $this->findNextRealNode($this->domDocument->firstChild); |
|
| 126 | |||
| 127 | // <feed, <rss, etc. |
||
| 128 | 361 | $rootElementStart = \sprintf('<%s', (string) $firstElement->nodeName); |
|
| 129 | |||
| 130 | // Read the entity definition file, and force-inject it into the XML document |
||
| 131 | 361 | $this->rawDocument = \str_replace( |
|
| 132 | 361 | $rootElementStart, |
|
| 133 | 361 | \sprintf( |
|
| 134 | 361 | '%s%s', |
|
| 135 | 361 | \trim( |
|
| 136 | 361 | \file_get_contents(\dirname(SIMPLEPIE_ROOT) . '/resources/entities.dtd') |
|
| 137 | ), |
||
| 138 | 361 | $rootElementStart |
|
| 139 | ), |
||
| 140 | 361 | $this->rawDocument |
|
| 141 | ); |
||
| 142 | } |
||
| 143 | |||
| 144 | // Parse the XML document with the configured libxml options |
||
| 145 | 560 | $this->domDocument->loadXML($this->rawDocument, $this->libxml); |
|
| 146 | |||
| 147 | // Register the namespace handler. |
||
| 148 | 560 | $this->ns = (new Ns($this->domDocument)) |
|
| 149 | 560 | ->setLogger($this->getLogger()); |
|
| 150 | |||
| 151 | // Look at which namespaces the registered middleware understands. |
||
| 152 | 560 | $this->middleware->registerNamespaces($this->ns); |
|
| 153 | |||
| 154 | // Instantiate a new write-to feed object. |
||
| 155 | 560 | $this->feed = (new Feed($this->getNamespaceAlias() ?? '')) |
|
| 156 | 560 | ->setLogger($this->getLogger()); |
|
| 157 | |||
| 158 | // Invoke the registered middleware. |
||
| 159 | 560 | $this->middleware->invoke( |
|
| 160 | 560 | E\FeedType::XML, |
|
| 161 | 560 | $this->getFeed()->getRoot(), |
|
| 162 | 560 | $this->getNamespaceAlias(), |
|
| 163 | 560 | $this->xpath() |
|
| 164 | ); |
||
| 165 | |||
| 166 | // Clear the libxml errors to avoid excessive memory usage |
||
| 167 | 560 | \libxml_clear_errors(); |
|
| 168 | 560 | } |
|
| 169 | |||
| 170 | /** |
||
| 171 | * Get the XML namespace handler. |
||
| 172 | */ |
||
| 173 | 560 | public function getNs(): Ns |
|
| 174 | { |
||
| 175 | 560 | return $this->ns; |
|
| 176 | } |
||
| 177 | |||
| 178 | /** |
||
| 179 | * Get the preferred namespace alias. |
||
| 180 | */ |
||
| 181 | 560 | public function getNamespaceAlias(): ?string |
|
| 182 | { |
||
| 183 | 560 | $namespace = $this->getNs(); |
|
| 184 | |||
| 185 | 560 | return $namespace->getPreferredNamespaceAlias( |
|
| 186 | 560 | $this->domDocument->documentElement->namespaceURI |
|
| 187 | ); |
||
| 188 | } |
||
| 189 | |||
| 190 | /** |
||
| 191 | * Gets a reference to the `DOMXPath` object, with the default namespace |
||
| 192 | * already registered. |
||
| 193 | * |
||
| 194 | * @return DOMXPath |
||
| 195 | */ |
||
| 196 | 560 | public function xpath() |
|
| 197 | { |
||
| 198 | 560 | $ns = $this->getNamespaceAlias(); |
|
| 199 | 560 | $xpath = new DOMXPath($this->domDocument); |
|
| 200 | |||
| 201 | // Register the namespace alias with the XPath instance |
||
| 202 | 560 | if (null !== $ns) { |
|
|
0 ignored issues
–
show
|
|||
| 203 | 560 | $xpath->registerNamespace( |
|
| 204 | 560 | $ns, |
|
| 205 | 560 | $this->domDocument->documentElement->namespaceURI ?? '' |
|
| 206 | ); |
||
| 207 | } |
||
| 208 | |||
| 209 | 560 | return $xpath; |
|
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Some DOMNode names are `#comment` or `#text`. This method will move the |
||
| 214 | * pointer to the next node, then the next until it finds a real XML node. |
||
| 215 | * |
||
| 216 | * @param DOMNode $node The `DOMNode` element to evaluate. |
||
| 217 | */ |
||
| 218 | 361 | public function findNextRealNode(DOMNode $node): DOMNode |
|
| 219 | { |
||
| 220 | 361 | $n = $node; |
|
| 221 | |||
| 222 | 361 | while (($n instanceof DOMComment || $n instanceof DOMText) && null !== $n) { |
|
| 223 | 322 | $n = $n->nextSibling; |
|
| 224 | } |
||
| 225 | |||
| 226 | 361 | return $n; |
|
| 227 | } |
||
| 228 | |||
| 229 | /** |
||
| 230 | * Retrieves the object which represents the top-level feed. |
||
| 231 | */ |
||
| 232 | 560 | public function getFeed(): Feed |
|
| 233 | { |
||
| 234 | 560 | return $this->feed; |
|
| 235 | } |
||
| 236 | } |
||
| 237 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths