Completed
Push — master ( e74d94...4f7e5e )
by Thomas
03:05
created

src/FluentDOM/Loader/Html.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Load a DOM document from a xml string
4
 *
5
 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
6
 * @copyright Copyright (c) 2009-2017 FluentDOM Contributors
7
 */
8
9
namespace FluentDOM\Loader {
10
11
  use FluentDOM\DOM\Document;
12
  use FluentDOM\DOM\DocumentFragment;
13
  use FluentDOM\DOM\ProcessingInstruction;
14
  use FluentDOM\Loadable;
15
16
  /**
17
   * Load a DOM document from a xml string
18
   */
19
  class Html implements Loadable {
20
21
    use Supports\Libxml;
22
23
    const IS_FRAGMENT = 'is_fragment';
24
25
    /**
26
     * @return string[]
27
     */
28 19
    public function getSupported(): array {
29 19
      return ['html', 'text/html', 'html-fragment', 'text/html-fragment'];
30
    }
31
32
    /**
33
     * @see Loadable::load
34
     * @param string $source
35
     * @param string $contentType
36
     * @param array|\Traversable|Options $options
37
     * @return Document|Result|NULL
38
     * @throws \FluentDOM\Exceptions\InvalidSource\TypeString
39
     * @throws \FluentDOM\Exceptions\InvalidSource\TypeFile
40
     */
41 14
    public function load($source, string $contentType, $options = []) {
42 14
      if ($this->supports($contentType)) {
43 13
        return (new Libxml\Errors())->capture(
44
          function() use ($source, $contentType, $options) {
45 13
            $selection = FALSE;
46 13
            $document = new Document();
47 13
            $settings = $this->getOptions($options);
48 13
            if ($this->isFragment($contentType, $settings)) {
49 2
              $this->loadFragmentIntoDom($document, $source, $settings);
50 2
              $selection = $document->evaluate('/node()');
51
            } else {
52 11
              $settings->isAllowed($sourceType = $settings->getSourceType($source));
53
              switch ($sourceType) {
54 10
              case Options::IS_FILE :
55 2
                $document->loadHTMLFile($source, $settings[Options::LIBXML_OPTIONS]);
56 2
                break;
57 8
              case Options::IS_STRING :
58
              default :
59 8
                $document->loadHTML(
60 8
                  $this->ensureEncodingPI($source, $settings[Options::ENCODING], $settings[Options::FORCE_ENCODING]),
61 8
                  $settings[Options::LIBXML_OPTIONS]
62
                );
63
              }
64
            }
65
            /** @var ProcessingInstruction $pi */
66 12
            if ($pi = $document->xpath()->firstOf('//processing-instruction()')) {
67 8
              $pi->remove();
68
            }
69 12
            return new Result($document, 'text/html', $selection);
70 13
          }
71
        );
72
      }
73 1
      return NULL;
74
    }
75
76 12
    private function ensureEncodingPI(string $source, string $encoding = NULL, bool $force = NULL): string {
77 12
      $hasXmlPi = preg_match('(<\\?xml\\s)', $source);
78 12
      if (!$force && ($charset = $this->getCharsetFromMetaTag($source))) {
1 ignored issue
show
Bug Best Practice introduced by
The expression $force of type null|boolean is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
79 1
        $encoding = (string)$charset;
80
      }
81 12
      $pi = '<?xml version="1.0" encoding="'.htmlspecialchars($encoding).'"?>';
82 12
      if (!$hasXmlPi) {
83 10
        return $pi.$source;
84
      }
85 2
      if ($force) {
86 1
        return preg_replace('(<\\?xml\\s[^?>]*?>)', $pi, $source, 1);
87
      }
88 1
      return $source;
89
    }
90
91
    /**
92
     * @param string $source
93
     * @return string|bool
94
     */
95 11
    private function getCharsetFromMetaTag(string $source) {
96 11
      $hasMetaTag = preg_match(
97 11
        '(<meta\\s+[^>]*charset=["\']\s*(?<charset>[^\\s\'">]+)\s*["\'])i',
98 11
        $source,
99 11
        $match
100
      );
101 11
      if ($hasMetaTag) {
102 1
        return $match['charset'];
103
      }
104 10
      $hasMetaTag = preg_match(
105 10
        '(<meta\\s+[^>]*http-equiv=["\']content-type["\'][^>]*>)i',
106 10
        $source,
107 10
        $match
108
      );
109 10
      if ($hasMetaTag) {
110 1
        preg_match(
111 1
          '(content=["\']\s*[^#\']+;\s*charset\s*=\s*(?<encoding>[^\S\'">]+))',
112 1
          $match[0],
113 1
          $match
114
        );
115 1
        return $match['encoding'] ?? FALSE;
116
      }
117 9
      return FALSE;
118
    }
119
120
    /**
121
     * @see LoadableFragment::loadFragment
122
     * @param string $source
123
     * @param string $contentType
124
     * @param array|\Traversable|Options $options
125
     * @return DocumentFragment|NULL
126
     */
127 3
    public function loadFragment($source, string $contentType, $options = []) {
128 3
      if ($this->supports($contentType)) {
129 2
        $options = $this->getOptions($options);
130 2
        return (new Libxml\Errors())->capture(
131 2
          function() use ($source, $options) {
132 2
            $document = new Document();
133 2
            $fragment = $document->createDocumentFragment();
134 2
            $document->loadHTML(
135 2
              $this->ensureEncodingPI(
136 2
                '<html-fragment>'.$source.'</html-fragment>',
137 2
                $options[Options::ENCODING],
138 2
                $options[Options::FORCE_ENCODING]
139
              ),
140 2
              $options[Options::LIBXML_OPTIONS]
141
            );
142 2
            $nodes = $document->evaluate('//html-fragment[1]/node()');
143 2
            foreach ($nodes as $node) {
144 2
              $fragment->append($node);
145
            }
146 2
            return $fragment;
147 2
          }
148
        );
149
      }
150 1
      return NULL;
151
    }
152
153 13
    private function isFragment(string $contentType, $options) {
154
      return (
155 13
        $contentType === 'html-fragment' ||
156 13
        $contentType === 'text/html-fragment' ||
157 13
        $options[self::IS_FRAGMENT]
158
      );
159
    }
160
161 2
    private function loadFragmentIntoDom(\DOMDocument $document, string $source, $settings) {
162 2
      $htmlDom = new Document();
163 2
      $htmlDom->loadHTML(
164 2
        $this->ensureEncodingPI(
165 2
          '<html-fragment>'.$source.'</html-fragment>',
166 2
          $settings[Options::ENCODING],
167 2
          $settings[Options::FORCE_ENCODING]
168
        ),
169 2
        $settings[Options::LIBXML_OPTIONS]
170
      );
171 2
      $nodes = $htmlDom->evaluate('//html-fragment[1]/node()');
172 2
      foreach ($nodes as $node) {
1 ignored issue
show
The expression $nodes of type string|double|object<DOM...ct<FluentDOM\DOM\Node>> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
173
        /** @var \DOMNode $node */
174 2
        if ($importedNode = $document->importNode($node, TRUE)) {
175 2
          $document->appendChild($importedNode);
176
        }
177
      }
178 2
    }
179
  }
180
}