This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace eNTiDi\Autotoc; |
||
| 4 | |||
| 5 | use DOMDocument; |
||
| 6 | use DOMElement; |
||
| 7 | use DOMXPath; |
||
| 8 | |||
| 9 | /* |
||
| 10 | * Tocifier is intentionally decoupled from SilverStripe to be able to |
||
| 11 | * test it without needing to put all the test infrastructure up. |
||
| 12 | */ |
||
| 13 | class Tocifier |
||
| 14 | { |
||
| 15 | // Prefix to prepend to every URL fragment |
||
| 16 | public static $prefix = 'TOC-'; |
||
| 17 | |||
| 18 | // The original HTML |
||
| 19 | private $raw_html = ''; |
||
| 20 | |||
| 21 | // $raw_html augmented for proper navigation |
||
| 22 | private $html = ''; |
||
| 23 | |||
| 24 | // The most recently generated TOC tree. |
||
| 25 | private $tree; |
||
| 26 | |||
| 27 | // Array of references to the potential parents |
||
| 28 | private $dangling = []; |
||
| 29 | |||
| 30 | // Callback for augmenting a single DOMElement |
||
| 31 | private $augment_callback; |
||
| 32 | |||
| 33 | |||
| 34 | /** |
||
| 35 | * Get the TOC node closest to a given nesting level. |
||
| 36 | * |
||
| 37 | * @param int $level The requested nesting level. |
||
| 38 | * @return array |
||
| 39 | */ |
||
| 40 | 6 | private function &getParent($level) |
|
| 41 | { |
||
| 42 | 6 | while (--$level >= 0) { |
|
| 43 | 6 | if (isset($this->dangling[$level])) { |
|
| 44 | 6 | return $this->dangling[$level]; |
|
| 45 | } |
||
| 46 | 6 | } |
|
| 47 | // This should never be reached |
||
| 48 | assert(false); |
||
| 49 | } |
||
| 50 | |||
| 51 | /** |
||
| 52 | * Get the plain text content from a DOM element. |
||
| 53 | * |
||
| 54 | * @param DOMElement $tag The DOM element to inspect. |
||
| 55 | * @return string |
||
| 56 | */ |
||
| 57 | 6 | private function getPlainText(DOMElement $tag) |
|
| 58 | { |
||
| 59 | // Work on a copy |
||
| 60 | 6 | $clone = $tag->cloneNode(true); |
|
| 61 | |||
| 62 | // Strip unneded tags (<small>) |
||
| 63 | 6 | while (($tag = $clone->getElementsByTagName('small')) && $tag->length) { |
|
| 64 | $tag->item(0)->parentNode->removeChild($tag->item(0)); |
||
| 65 | } |
||
| 66 | |||
| 67 | 6 | return $clone->textContent; |
|
| 68 | } |
||
| 69 | |||
| 70 | /** |
||
| 71 | * Create a new TOC node. |
||
| 72 | * |
||
| 73 | * @param string $id Node id, used for anchoring |
||
| 74 | * @param string $text Title text |
||
| 75 | * @param int $level The nesting level of the node |
||
| 76 | * @return array |
||
| 77 | */ |
||
| 78 | 8 | private function &newNode($id, $text, $level) |
|
| 79 | { |
||
| 80 | $node = [ |
||
| 81 | 8 | 'id' => $id, |
|
| 82 | 'title' => $text |
||
| 83 | 8 | ]; |
|
| 84 | |||
| 85 | // Clear the trailing dangling parents after level, if any |
||
| 86 | 8 | end($this->dangling); |
|
| 87 | 8 | $last = key($this->dangling); |
|
| 88 | 8 | for ($n = $level+1; $n <= $last; ++$n) { |
|
| 89 | 6 | unset($this->dangling[$n]); |
|
| 90 | 6 | } |
|
| 91 | |||
| 92 | // Consider this node a potential dangling parent |
||
| 93 | 8 | $this->dangling[$level] = & $node; |
|
| 94 | |||
| 95 | 8 | return $node; |
|
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * Process the specific document. |
||
| 100 | * |
||
| 101 | * @param DOMDocument $doc The document to process. |
||
| 102 | */ |
||
| 103 | 8 | private function processDocument($doc) |
|
| 104 | { |
||
| 105 | 8 | $this->tree = & $this->newNode(self::$prefix, '', 0); |
|
| 106 | 8 | $n = 1; |
|
| 107 | |||
| 108 | 8 | $xpath = new DOMXPath($doc); |
|
| 109 | 8 | $query = '//*[translate(name(), "123456", "......") = "h."][not(@data-hide-from-toc)]'; |
|
| 110 | |||
| 111 | 8 | foreach ($xpath->query($query) as $h) { |
|
| 112 | 6 | $text = $this->getPlainText($h); |
|
| 113 | 6 | $level = (int) substr($h->tagName, 1); |
|
| 114 | 6 | $id = self::$prefix.$n; |
|
| 115 | 6 | ++$n; |
|
| 116 | |||
| 117 | // Build the tree |
||
| 118 | 6 | $parent = & $this->getParent($level); |
|
| 119 | 6 | $node = & $this->newNode($id, $text, $level); |
|
| 120 | 6 | if (!isset($parent['children'])) { |
|
| 121 | 6 | $parent['children'] = []; |
|
| 122 | 6 | } |
|
| 123 | 6 | $parent['children'][] = & $node; |
|
| 124 | |||
| 125 | 6 | call_user_func($this->augment_callback, $doc, $h, $id); |
|
| 126 | 8 | } |
|
| 127 | |||
| 128 | 8 | $body = $doc->getElementsByTagName('body')->item(0); |
|
| 129 | 8 | $this->html = str_replace(["<body>\n", '<body>', '</body>'], '', $doc->saveHTML($body)); |
|
| 130 | 8 | } |
|
| 131 | |||
| 132 | /** |
||
| 133 | * Debug function for dumping a TOC node and its children. |
||
| 134 | * |
||
| 135 | * @param array $node The TOC node to dump |
||
| 136 | * @param string $indent Indentation string. |
||
| 137 | */ |
||
| 138 | 2 | private function dumpBranch($node, $indent = '') |
|
| 139 | { |
||
| 140 | 2 | echo $indent.$node['title']."\n"; |
|
| 141 | 2 | if (isset($node['children'])) { |
|
| 142 | 1 | foreach ($node['children'] as &$child) { |
|
| 143 | 1 | $this->dumpBranch($child, "$indent\t"); |
|
| 144 | 1 | } |
|
| 145 | 1 | } |
|
| 146 | 2 | } |
|
| 147 | |||
| 148 | |||
| 149 | /** |
||
| 150 | * Create a new TOCifier instance. |
||
| 151 | * |
||
| 152 | * A string containing the HTML to parse for TOC must be passed |
||
| 153 | * in. The real processing will be triggered by the process() |
||
| 154 | * method. |
||
| 155 | * |
||
| 156 | * Parsing a file can be easily performed by using |
||
| 157 | * file_get_contents(): |
||
| 158 | * |
||
| 159 | * <code> |
||
| 160 | * $tocifier = new Tocifier(@file_get_content($file)); |
||
| 161 | * </code> |
||
| 162 | * |
||
| 163 | * @param string $html A chunk of valid HTML (UTF-8 encoded). |
||
| 164 | */ |
||
| 165 | 8 | public function __construct($html) |
|
| 166 | { |
||
| 167 | 8 | $this->raw_html = $html; |
|
| 168 | 8 | $this->setAugmentCallback([static::class, 'setId']); |
|
| 169 | 8 | } |
|
| 170 | |||
| 171 | /** |
||
| 172 | * Change the augment method used by this Tocifier instance. |
||
| 173 | * |
||
| 174 | * By default the HTML is augmented prepending an anchor before |
||
| 175 | * every valid destination. This behavior can be changed by using |
||
| 176 | * Tocifier::setId() (that directly sets the ID on the destination |
||
| 177 | * elements) or by providing your own callback. |
||
| 178 | * |
||
| 179 | * The signature of the callback to pass in should be compatible |
||
| 180 | * with: |
||
| 181 | * |
||
| 182 | * function callback(DOMDocument $dom, DOMElement $element, $id) |
||
| 183 | * |
||
| 184 | * @param callable $callback The new function to call for |
||
| 185 | * augmenting DOMElement |
||
| 186 | */ |
||
| 187 | 8 | public function setAugmentCallback($callback) |
|
| 188 | { |
||
| 189 | 8 | $this->augment_callback = $callback; |
|
| 190 | 8 | } |
|
| 191 | |||
| 192 | /** |
||
| 193 | * Parse and process the HTML chunk. |
||
| 194 | * |
||
| 195 | * The parsing phase involves picking up all the HTML header |
||
| 196 | * elements (from <h1> to <h6>), so if the HTML is not well formed |
||
| 197 | * or any other error is encountered this function will fail. |
||
| 198 | * |
||
| 199 | * @return boolean true on success, false on errors. |
||
| 200 | */ |
||
| 201 | 8 | public function process() |
|
| 202 | { |
||
| 203 | // Check if $this->raw_html is valid |
||
| 204 | 8 | if (!is_string($this->raw_html) || empty($this->raw_html)) { |
|
| 205 | 2 | return false; |
|
| 206 | } |
||
| 207 | |||
| 208 | // DOMDocument sucks ass (welcome to PHP, you poor shit). I |
||
| 209 | // really don't understand why it is so difficult for loadHTML() |
||
| 210 | // to read a chunk of text in UTF-8... |
||
| 211 | 8 | $html = mb_convert_encoding($this->raw_html, 'HTML-ENTITIES', 'UTF-8'); |
|
| 212 | |||
| 213 | // Parse the HTML into a DOMDocument tree |
||
| 214 | 8 | $doc = new DOMDocument(); |
|
| 215 | 8 | if (!@$doc->loadHTML($html)) { |
|
| 216 | return false; |
||
| 217 | } |
||
| 218 | |||
| 219 | // Process the doc |
||
| 220 | 8 | $this->processDocument($doc); |
|
| 221 | 8 | return true; |
|
| 222 | } |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Get the TOC (Table Of Contents) from the provided HTML. |
||
| 226 | * |
||
| 227 | * The HTML must be provided throught the constructor. |
||
| 228 | * |
||
| 229 | * The TOC is represented in the form of: |
||
| 230 | * |
||
| 231 | * [ |
||
| 232 | * [ |
||
| 233 | * 'id' => 'TOC-1', |
||
| 234 | * 'title' => 'Item 1', |
||
| 235 | * 'children' => [ |
||
| 236 | * [ |
||
| 237 | * 'id' => 'TOC-2', |
||
| 238 | * 'title' => 'Subitem 1.1' |
||
| 239 | * ], [ |
||
| 240 | * 'id' => 'TOC-3', |
||
| 241 | * 'title' => 'Subitem 1.2', |
||
| 242 | * 'children' => [ |
||
| 243 | * [ |
||
| 244 | * 'id' => 'TOC-4', |
||
| 245 | * 'title => 'Subsubitem 1.2.1' |
||
| 246 | * ] |
||
| 247 | * ] |
||
| 248 | * ] |
||
| 249 | * ], |
||
| 250 | * ], [ |
||
| 251 | * 'id' => 'TOC-5, |
||
| 252 | * 'title' => 'Item 2', |
||
| 253 | * 'children' => [ |
||
| 254 | * [ |
||
| 255 | * 'id' => 'TOC-6', |
||
| 256 | * 'title' => 'Subitem 2.1' |
||
| 257 | * ], [ |
||
| 258 | * 'id' => 'TOC-7', |
||
| 259 | * 'title' => 'Subitem 2.2' |
||
| 260 | * ] |
||
| 261 | * ] |
||
| 262 | * ] |
||
| 263 | * ] |
||
| 264 | * |
||
| 265 | * The TOC is cached, so subsequent calls will return the same tree. |
||
| 266 | * |
||
| 267 | * @return Array An array representing the TOC. A valid array is |
||
| 268 | * always returned. |
||
| 269 | */ |
||
| 270 | 3 | public function getTOC() |
|
| 271 | { |
||
| 272 | 3 | return isset($this->tree['children']) ? $this->tree['children'] : []; |
|
| 273 | } |
||
| 274 | |||
| 275 | /** |
||
| 276 | * Get the HTML augmented for proper navigation. |
||
| 277 | * |
||
| 278 | * The HTML must be provided throught the feedHtml() method. |
||
| 279 | * The returned string is cached, so subsequent calls will return |
||
| 280 | * the same string without further processing. |
||
| 281 | * |
||
| 282 | * @return String The augmented HTML. |
||
| 283 | */ |
||
| 284 | 6 | public function getHtml() |
|
| 285 | { |
||
| 286 | 6 | return $this->html; |
|
| 287 | } |
||
| 288 | |||
| 289 | /** |
||
| 290 | * Dump the TOC to stdout for debugging purpose. |
||
| 291 | */ |
||
| 292 | 2 | public function dumpTOC() |
|
| 293 | { |
||
| 294 | 2 | $this->dumpBranch($this->tree); |
|
| 295 | 2 | } |
|
| 296 | |||
| 297 | /** |
||
| 298 | * Augment a DOMElement by prepending an anchor. |
||
| 299 | * |
||
| 300 | * An HTML fragment such as: |
||
| 301 | * |
||
| 302 | * <h1>First</h2> |
||
| 303 | * <h2>Second</h1> |
||
| 304 | * |
||
| 305 | * will become: |
||
| 306 | * |
||
| 307 | * <a id="TOC-1" class="anchor"></a><h1>First</h2> |
||
| 308 | * <a id="TOC-2" class="anchor"></a><h2>Second</h1> |
||
| 309 | * |
||
| 310 | * @param DOMDocument $dom The DOM owning $element |
||
| 311 | * @param DOMElement $element The element to augment |
||
| 312 | * @param string $id The destination ID |
||
| 313 | */ |
||
| 314 | 2 | public static function prependAnchor(DOMDocument $dom, DOMElement $element, $id) |
|
| 315 | { |
||
| 316 | 2 | $anchor = $dom->createElement('a'); |
|
| 317 | 2 | $anchor->setAttribute('id', $id); |
|
| 318 | 2 | $anchor->setAttribute('class', 'anchor'); |
|
| 319 | 2 | $element->parentNode->insertBefore($anchor, $element); |
|
| 320 | 2 | } |
|
| 321 | |||
| 322 | /** |
||
| 323 | * Augment a DOMElement by setting its ID. |
||
| 324 | * |
||
| 325 | * An HTML fragment such as: |
||
| 326 | * |
||
| 327 | * <h1>First</h2> |
||
| 328 | * <h2>Second</h1> |
||
| 329 | * |
||
| 330 | * will become: |
||
| 331 | * |
||
| 332 | * <h1 id="TOC-1" class="anchor">First</h2> |
||
| 333 | * <h2 id="TOC-2" class="anchor">Second</h1> |
||
| 334 | * |
||
| 335 | * @param DOMDocument $dom The DOM owning $element |
||
| 336 | * @param DOMElement $element The element to augment |
||
| 337 | * @param string $id The destination ID |
||
| 338 | */ |
||
| 339 | 5 | public static function setId(DOMDocument $dom, DOMElement $element, $id) |
|
|
0 ignored issues
–
show
|
|||
| 340 | { |
||
| 341 | 5 | $element->setAttribute('id', $id); |
|
| 342 | 5 | $element->setAttribute('class', 'anchor'); |
|
| 343 | 5 | } |
|
| 344 | } |
||
| 345 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.