This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace eNTiDi\Autotoc; |
||
4 | |||
5 | use DOMDocument; |
||
6 | use DOMElement; |
||
7 | use DOMXPath; |
||
8 | |||
9 | /* |
||
10 | * Tocifier is intentionally decoupled from SilverStripe to be able to |
||
11 | * test it without needing to put all the test infrastructure up. |
||
12 | */ |
||
13 | class Tocifier |
||
14 | { |
||
15 | // Prefix to prepend to every URL fragment |
||
16 | public static $prefix = 'TOC-'; |
||
17 | |||
18 | // The original HTML |
||
19 | private $raw_html = ''; |
||
20 | |||
21 | // $raw_html augmented for proper navigation |
||
22 | private $html = ''; |
||
23 | |||
24 | // The most recently generated TOC tree. |
||
25 | private $tree; |
||
26 | |||
27 | // Array of references to the potential parents |
||
28 | private $dangling = []; |
||
29 | |||
30 | // Callback for augmenting a single DOMElement |
||
31 | private $augment_callback; |
||
32 | |||
33 | |||
34 | /** |
||
35 | * Get the TOC node closest to a given nesting level. |
||
36 | * |
||
37 | * @param int $level The requested nesting level. |
||
38 | * @return array |
||
39 | */ |
||
40 | 6 | private function &getParent($level) |
|
41 | { |
||
42 | 6 | while (--$level >= 0) { |
|
43 | 6 | if (isset($this->dangling[$level])) { |
|
44 | 6 | return $this->dangling[$level]; |
|
45 | } |
||
46 | 6 | } |
|
47 | // This should never be reached |
||
48 | assert(false); |
||
49 | } |
||
50 | |||
51 | /** |
||
52 | * Get the plain text content from a DOM element. |
||
53 | * |
||
54 | * @param DOMElement $tag The DOM element to inspect. |
||
55 | * @return string |
||
56 | */ |
||
57 | 6 | private function getPlainText(DOMElement $tag) |
|
58 | { |
||
59 | // Work on a copy |
||
60 | 6 | $clone = $tag->cloneNode(true); |
|
61 | |||
62 | // Strip unneded tags (<small>) |
||
63 | 6 | while (($tag = $clone->getElementsByTagName('small')) && $tag->length) { |
|
64 | $tag->item(0)->parentNode->removeChild($tag->item(0)); |
||
65 | } |
||
66 | |||
67 | 6 | return $clone->textContent; |
|
68 | } |
||
69 | |||
70 | /** |
||
71 | * Create a new TOC node. |
||
72 | * |
||
73 | * @param string $id Node id, used for anchoring |
||
74 | * @param string $text Title text |
||
75 | * @param int $level The nesting level of the node |
||
76 | * @return array |
||
77 | */ |
||
78 | 8 | private function &newNode($id, $text, $level) |
|
79 | { |
||
80 | $node = [ |
||
81 | 8 | 'id' => $id, |
|
82 | 'title' => $text |
||
83 | 8 | ]; |
|
84 | |||
85 | // Clear the trailing dangling parents after level, if any |
||
86 | 8 | end($this->dangling); |
|
87 | 8 | $last = key($this->dangling); |
|
88 | 8 | for ($n = $level+1; $n <= $last; ++$n) { |
|
89 | 6 | unset($this->dangling[$n]); |
|
90 | 6 | } |
|
91 | |||
92 | // Consider this node a potential dangling parent |
||
93 | 8 | $this->dangling[$level] = & $node; |
|
94 | |||
95 | 8 | return $node; |
|
96 | } |
||
97 | |||
98 | /** |
||
99 | * Process the specific document. |
||
100 | * |
||
101 | * @param DOMDocument $doc The document to process. |
||
102 | */ |
||
103 | 8 | private function processDocument($doc) |
|
104 | { |
||
105 | 8 | $this->tree = & $this->newNode(self::$prefix, '', 0); |
|
106 | 8 | $n = 1; |
|
107 | |||
108 | 8 | $xpath = new DOMXPath($doc); |
|
109 | 8 | $query = '//*[translate(name(), "123456", "......") = "h."][not(@data-hide-from-toc)]'; |
|
110 | |||
111 | 8 | foreach ($xpath->query($query) as $h) { |
|
112 | 6 | $text = $this->getPlainText($h); |
|
113 | 6 | $level = (int) substr($h->tagName, 1); |
|
114 | 6 | $id = self::$prefix.$n; |
|
115 | 6 | ++$n; |
|
116 | |||
117 | // Build the tree |
||
118 | 6 | $parent = & $this->getParent($level); |
|
119 | 6 | $node = & $this->newNode($id, $text, $level); |
|
120 | 6 | if (!isset($parent['children'])) { |
|
121 | 6 | $parent['children'] = []; |
|
122 | 6 | } |
|
123 | 6 | $parent['children'][] = & $node; |
|
124 | |||
125 | 6 | call_user_func($this->augment_callback, $doc, $h, $id); |
|
126 | 8 | } |
|
127 | |||
128 | 8 | $body = $doc->getElementsByTagName('body')->item(0); |
|
129 | 8 | $this->html = str_replace(["<body>\n", '<body>', '</body>'], '', $doc->saveHTML($body)); |
|
130 | 8 | } |
|
131 | |||
132 | /** |
||
133 | * Debug function for dumping a TOC node and its children. |
||
134 | * |
||
135 | * @param array $node The TOC node to dump |
||
136 | * @param string $indent Indentation string. |
||
137 | */ |
||
138 | 2 | private function dumpBranch($node, $indent = '') |
|
139 | { |
||
140 | 2 | echo $indent.$node['title']."\n"; |
|
141 | 2 | if (isset($node['children'])) { |
|
142 | 1 | foreach ($node['children'] as &$child) { |
|
143 | 1 | $this->dumpBranch($child, "$indent\t"); |
|
144 | 1 | } |
|
145 | 1 | } |
|
146 | 2 | } |
|
147 | |||
148 | |||
149 | /** |
||
150 | * Create a new TOCifier instance. |
||
151 | * |
||
152 | * A string containing the HTML to parse for TOC must be passed |
||
153 | * in. The real processing will be triggered by the process() |
||
154 | * method. |
||
155 | * |
||
156 | * Parsing a file can be easily performed by using |
||
157 | * file_get_contents(): |
||
158 | * |
||
159 | * <code> |
||
160 | * $tocifier = new Tocifier(@file_get_content($file)); |
||
161 | * </code> |
||
162 | * |
||
163 | * @param string $html A chunk of valid HTML (UTF-8 encoded). |
||
164 | */ |
||
165 | 8 | public function __construct($html) |
|
166 | { |
||
167 | 8 | $this->raw_html = $html; |
|
168 | 8 | $this->setAugmentCallback([static::class, 'setId']); |
|
169 | 8 | } |
|
170 | |||
171 | /** |
||
172 | * Change the augment method used by this Tocifier instance. |
||
173 | * |
||
174 | * By default the HTML is augmented prepending an anchor before |
||
175 | * every valid destination. This behavior can be changed by using |
||
176 | * Tocifier::setId() (that directly sets the ID on the destination |
||
177 | * elements) or by providing your own callback. |
||
178 | * |
||
179 | * The signature of the callback to pass in should be compatible |
||
180 | * with: |
||
181 | * |
||
182 | * function callback(DOMDocument $dom, DOMElement $element, $id) |
||
183 | * |
||
184 | * @param callable $callback The new function to call for |
||
185 | * augmenting DOMElement |
||
186 | */ |
||
187 | 8 | public function setAugmentCallback($callback) |
|
188 | { |
||
189 | 8 | $this->augment_callback = $callback; |
|
190 | 8 | } |
|
191 | |||
192 | /** |
||
193 | * Parse and process the HTML chunk. |
||
194 | * |
||
195 | * The parsing phase involves picking up all the HTML header |
||
196 | * elements (from <h1> to <h6>), so if the HTML is not well formed |
||
197 | * or any other error is encountered this function will fail. |
||
198 | * |
||
199 | * @return boolean true on success, false on errors. |
||
200 | */ |
||
201 | 8 | public function process() |
|
202 | { |
||
203 | // Check if $this->raw_html is valid |
||
204 | 8 | if (!is_string($this->raw_html) || empty($this->raw_html)) { |
|
205 | 2 | return false; |
|
206 | } |
||
207 | |||
208 | // DOMDocument sucks ass (welcome to PHP, you poor shit). I |
||
209 | // really don't understand why it is so difficult for loadHTML() |
||
210 | // to read a chunk of text in UTF-8... |
||
211 | 8 | $html = mb_convert_encoding($this->raw_html, 'HTML-ENTITIES', 'UTF-8'); |
|
212 | |||
213 | // Parse the HTML into a DOMDocument tree |
||
214 | 8 | $doc = new DOMDocument(); |
|
215 | 8 | if (!@$doc->loadHTML($html)) { |
|
216 | return false; |
||
217 | } |
||
218 | |||
219 | // Process the doc |
||
220 | 8 | $this->processDocument($doc); |
|
221 | 8 | return true; |
|
222 | } |
||
223 | |||
224 | /** |
||
225 | * Get the TOC (Table Of Contents) from the provided HTML. |
||
226 | * |
||
227 | * The HTML must be provided throught the constructor. |
||
228 | * |
||
229 | * The TOC is represented in the form of: |
||
230 | * |
||
231 | * [ |
||
232 | * [ |
||
233 | * 'id' => 'TOC-1', |
||
234 | * 'title' => 'Item 1', |
||
235 | * 'children' => [ |
||
236 | * [ |
||
237 | * 'id' => 'TOC-2', |
||
238 | * 'title' => 'Subitem 1.1' |
||
239 | * ], [ |
||
240 | * 'id' => 'TOC-3', |
||
241 | * 'title' => 'Subitem 1.2', |
||
242 | * 'children' => [ |
||
243 | * [ |
||
244 | * 'id' => 'TOC-4', |
||
245 | * 'title => 'Subsubitem 1.2.1' |
||
246 | * ] |
||
247 | * ] |
||
248 | * ] |
||
249 | * ], |
||
250 | * ], [ |
||
251 | * 'id' => 'TOC-5, |
||
252 | * 'title' => 'Item 2', |
||
253 | * 'children' => [ |
||
254 | * [ |
||
255 | * 'id' => 'TOC-6', |
||
256 | * 'title' => 'Subitem 2.1' |
||
257 | * ], [ |
||
258 | * 'id' => 'TOC-7', |
||
259 | * 'title' => 'Subitem 2.2' |
||
260 | * ] |
||
261 | * ] |
||
262 | * ] |
||
263 | * ] |
||
264 | * |
||
265 | * The TOC is cached, so subsequent calls will return the same tree. |
||
266 | * |
||
267 | * @return Array An array representing the TOC. A valid array is |
||
268 | * always returned. |
||
269 | */ |
||
270 | 3 | public function getTOC() |
|
271 | { |
||
272 | 3 | return isset($this->tree['children']) ? $this->tree['children'] : []; |
|
273 | } |
||
274 | |||
275 | /** |
||
276 | * Get the HTML augmented for proper navigation. |
||
277 | * |
||
278 | * The HTML must be provided throught the feedHtml() method. |
||
279 | * The returned string is cached, so subsequent calls will return |
||
280 | * the same string without further processing. |
||
281 | * |
||
282 | * @return String The augmented HTML. |
||
283 | */ |
||
284 | 6 | public function getHtml() |
|
285 | { |
||
286 | 6 | return $this->html; |
|
287 | } |
||
288 | |||
289 | /** |
||
290 | * Dump the TOC to stdout for debugging purpose. |
||
291 | */ |
||
292 | 2 | public function dumpTOC() |
|
293 | { |
||
294 | 2 | $this->dumpBranch($this->tree); |
|
295 | 2 | } |
|
296 | |||
297 | /** |
||
298 | * Augment a DOMElement by prepending an anchor. |
||
299 | * |
||
300 | * An HTML fragment such as: |
||
301 | * |
||
302 | * <h1>First</h2> |
||
303 | * <h2>Second</h1> |
||
304 | * |
||
305 | * will become: |
||
306 | * |
||
307 | * <a id="TOC-1" class="anchor"></a><h1>First</h2> |
||
308 | * <a id="TOC-2" class="anchor"></a><h2>Second</h1> |
||
309 | * |
||
310 | * @param DOMDocument $dom The DOM owning $element |
||
311 | * @param DOMElement $element The element to augment |
||
312 | * @param string $id The destination ID |
||
313 | */ |
||
314 | 2 | public static function prependAnchor(DOMDocument $dom, DOMElement $element, $id) |
|
315 | { |
||
316 | 2 | $anchor = $dom->createElement('a'); |
|
317 | 2 | $anchor->setAttribute('id', $id); |
|
318 | 2 | $anchor->setAttribute('class', 'anchor'); |
|
319 | 2 | $element->parentNode->insertBefore($anchor, $element); |
|
320 | 2 | } |
|
321 | |||
322 | /** |
||
323 | * Augment a DOMElement by setting its ID. |
||
324 | * |
||
325 | * An HTML fragment such as: |
||
326 | * |
||
327 | * <h1>First</h2> |
||
328 | * <h2>Second</h1> |
||
329 | * |
||
330 | * will become: |
||
331 | * |
||
332 | * <h1 id="TOC-1" class="anchor">First</h2> |
||
333 | * <h2 id="TOC-2" class="anchor">Second</h1> |
||
334 | * |
||
335 | * @param DOMDocument $dom The DOM owning $element |
||
336 | * @param DOMElement $element The element to augment |
||
337 | * @param string $id The destination ID |
||
338 | */ |
||
339 | 5 | public static function setId(DOMDocument $dom, DOMElement $element, $id) |
|
0 ignored issues
–
show
|
|||
340 | { |
||
341 | 5 | $element->setAttribute('id', $id); |
|
342 | 5 | $element->setAttribute('class', 'anchor'); |
|
343 | 5 | } |
|
344 | } |
||
345 |
This check looks from parameters that have been defined for a function or method, but which are not used in the method body.