Completed
Push — master ( e74d94...4f7e5e )
by Thomas
03:05
created

src/FluentDOM/Loader/Html.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
/**
3
 * Load a DOM document from a xml string
4
 *
5
 * @license http://www.opensource.org/licenses/mit-license.php The MIT License
6
 * @copyright Copyright (c) 2009-2017 FluentDOM Contributors
7
 */
8
9
namespace FluentDOM\Loader {
10
11
  use FluentDOM\DOM\Document;
12
  use FluentDOM\DOM\DocumentFragment;
13
  use FluentDOM\DOM\ProcessingInstruction;
14
  use FluentDOM\Loadable;
15
16
  /**
17
   * Load a DOM document from a xml string
18
   */
19
  class Html implements Loadable {
20
21
    use Supports\Libxml;
22
23
    const IS_FRAGMENT = 'is_fragment';
24
25
    /**
26
     * @return string[]
27
     */
28 19
    public function getSupported(): array {
29 19
      return ['html', 'text/html', 'html-fragment', 'text/html-fragment'];
30
    }
31
32
    /**
33
     * @see Loadable::load
34
     * @param string $source
35
     * @param string $contentType
36
     * @param array|\Traversable|Options $options
37
     * @return Document|Result|NULL
38
     * @throws \FluentDOM\Exceptions\InvalidSource\TypeString
39
     * @throws \FluentDOM\Exceptions\InvalidSource\TypeFile
40
     */
41 14
    public function load($source, string $contentType, $options = []) {
42 14
      if ($this->supports($contentType)) {
43 13
        return (new Libxml\Errors())->capture(
44
          function() use ($source, $contentType, $options) {
45 13
            $selection = FALSE;
46 13
            $document = new Document();
47 13
            $settings = $this->getOptions($options);
48 13
            if ($this->isFragment($contentType, $settings)) {
49 2
              $this->loadFragmentIntoDom($document, $source, $settings);
50 2
              $selection = $document->evaluate('/node()');
51
            } else {
52 11
              $settings->isAllowed($sourceType = $settings->getSourceType($source));
53
              switch ($sourceType) {
54 10
              case Options::IS_FILE :
55 2
                $document->loadHTMLFile($source, $settings[Options::LIBXML_OPTIONS]);
56 2
                break;
57 8
              case Options::IS_STRING :
58
              default :
59 8
                $document->loadHTML(
60 8
                  $this->ensureEncodingPI($source, $settings[Options::ENCODING], $settings[Options::FORCE_ENCODING]),
61 8
                  $settings[Options::LIBXML_OPTIONS]
62
                );
63
              }
64
            }
65
            /** @var ProcessingInstruction $pi */
66 12
            if ($pi = $document->xpath()->firstOf('//processing-instruction()')) {
67 8
              $pi->remove();
68
            }
69 12
            return new Result($document, 'text/html', $selection);
70 13
          }
71
        );
72
      }
73 1
      return NULL;
74
    }
75
76 12
    private function ensureEncodingPI(string $source, string $encoding = NULL, bool $force = NULL): string {
77 12
      $hasXmlPi = preg_match('(<\\?xml\\s)', $source);
78 12
      if (!$force && ($charset = $this->getCharsetFromMetaTag($source))) {
1 ignored issue
show
Bug Best Practice introduced by
The expression $force of type null|boolean is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
79 1
        $encoding = (string)$charset;
80
      }
81 12
      $pi = '<?xml version="1.0" encoding="'.htmlspecialchars($encoding).'"?>';
82 12
      if (!$hasXmlPi) {
83 10
        return $pi.$source;
84
      }
85 2
      if ($force) {
86 1
        return preg_replace('(<\\?xml\\s[^?>]*?>)', $pi, $source, 1);
87
      }
88 1
      return $source;
89
    }
90
91
    /**
92
     * @param string $source
93
     * @return string|bool
94
     */
95 11
    private function getCharsetFromMetaTag(string $source) {
96 11
      $hasMetaTag = preg_match(
97 11
        '(<meta\\s+[^>]*charset=["\']\s*(?<charset>[^\\s\'">]+)\s*["\'])i',
98 11
        $source,
99 11
        $match
100
      );
101 11
      if ($hasMetaTag) {
102 1
        return $match['charset'];
103
      }
104 10
      $hasMetaTag = preg_match(
105 10
        '(<meta\\s+[^>]*http-equiv=["\']content-type["\'][^>]*>)i',
106 10
        $source,
107 10
        $match
108
      );
109 10
      if ($hasMetaTag) {
110 1
        preg_match(
111 1
          '(content=["\']\s*[^#\']+;\s*charset\s*=\s*(?<encoding>[^\S\'">]+))',
112 1
          $match[0],
113 1
          $match
114
        );
115 1
        return $match['encoding'] ?? FALSE;
116
      }
117 9
      return FALSE;
118
    }
119
120
    /**
121
     * @see LoadableFragment::loadFragment
122
     * @param string $source
123
     * @param string $contentType
124
     * @param array|\Traversable|Options $options
125
     * @return DocumentFragment|NULL
126
     */
127 3
    public function loadFragment($source, string $contentType, $options = []) {
128 3
      if ($this->supports($contentType)) {
129 2
        $options = $this->getOptions($options);
130 2
        return (new Libxml\Errors())->capture(
131 2
          function() use ($source, $options) {
132 2
            $document = new Document();
133 2
            $fragment = $document->createDocumentFragment();
134 2
            $document->loadHTML(
135 2
              $this->ensureEncodingPI(
136 2
                '<html-fragment>'.$source.'</html-fragment>',
137 2
                $options[Options::ENCODING],
138 2
                $options[Options::FORCE_ENCODING]
139
              ),
140 2
              $options[Options::LIBXML_OPTIONS]
141
            );
142 2
            $nodes = $document->evaluate('//html-fragment[1]/node()');
143 2
            foreach ($nodes as $node) {
144 2
              $fragment->append($node);
145
            }
146 2
            return $fragment;
147 2
          }
148
        );
149
      }
150 1
      return NULL;
151
    }
152
153 13
    private function isFragment(string $contentType, $options) {
154
      return (
155 13
        $contentType === 'html-fragment' ||
156 13
        $contentType === 'text/html-fragment' ||
157 13
        $options[self::IS_FRAGMENT]
158
      );
159
    }
160
161 2
    private function loadFragmentIntoDom(\DOMDocument $document, string $source, $settings) {
162 2
      $htmlDom = new Document();
163 2
      $htmlDom->loadHTML(
164 2
        $this->ensureEncodingPI(
165 2
          '<html-fragment>'.$source.'</html-fragment>',
166 2
          $settings[Options::ENCODING],
167 2
          $settings[Options::FORCE_ENCODING]
168
        ),
169 2
        $settings[Options::LIBXML_OPTIONS]
170
      );
171 2
      $nodes = $htmlDom->evaluate('//html-fragment[1]/node()');
172 2
      foreach ($nodes as $node) {
173
        /** @var \DOMNode $node */
174 2
        if ($importedNode = $document->importNode($node, TRUE)) {
175 2
          $document->appendChild($importedNode);
176
        }
177
      }
178 2
    }
179
  }
180
}