Tocifier::prependAnchor()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
ccs 6
cts 6
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 5
nc 1
nop 3
crap 1
1
<?php
2
3
namespace eNTiDi\Autotoc;
4
5
use DOMDocument;
6
use DOMElement;
7
use DOMXPath;
8
9
/*
10
 * Tocifier is intentionally decoupled from SilverStripe to be able to
11
 * test it without needing to put all the test infrastructure up.
12
 */
13
class Tocifier
14
{
15
    // Prefix to prepend to every URL fragment
16
    public static $prefix = 'TOC-';
17
18
    // The original HTML
19
    private $raw_html = '';
20
21
    // $raw_html augmented for proper navigation
22
    private $html = '';
23
24
    // The most recently generated TOC tree.
25
    private $tree;
26
27
    // Array of references to the potential parents
28
    private $dangling = [];
29
30
    // Callback for augmenting a single DOMElement
31
    private $augment_callback;
32
33
34
    /**
35
     * Get the TOC node closest to a given nesting level.
36
     *
37
     * @param  int $level  The requested nesting level.
38
     * @return array
39
     */
40 6
    private function &getParent($level)
41
    {
42 6
        while (--$level >= 0) {
43 6
            if (isset($this->dangling[$level])) {
44 6
                return $this->dangling[$level];
45
            }
46 6
        }
47
        // This should never be reached
48
        assert(false);
49
    }
50
51
    /**
52
     * Get the plain text content from a DOM element.
53
     *
54
     * @param  DOMElement $tag  The DOM element to inspect.
55
     * @return string
56
     */
57 6
    private function getPlainText(DOMElement $tag)
58
    {
59
        // Work on a copy
60 6
        $clone = $tag->cloneNode(true);
61
62
        // Strip unneded tags (<small>)
63 6
        while (($tag = $clone->getElementsByTagName('small')) && $tag->length) {
64
            $tag->item(0)->parentNode->removeChild($tag->item(0));
65
        }
66
67 6
        return $clone->textContent;
68
    }
69
70
    /**
71
     * Create a new TOC node.
72
     *
73
     * @param  string $id     Node id, used for anchoring
74
     * @param  string $text   Title text
75
     * @param  int    $level  The nesting level of the node
76
     * @return array
77
     */
78 8
    private function &newNode($id, $text, $level)
79
    {
80
        $node = [
81 8
            'id'    => $id,
82
            'title' => $text
83 8
        ];
84
85
        // Clear the trailing dangling parents after level, if any
86 8
        end($this->dangling);
87 8
        $last = key($this->dangling);
88 8
        for ($n = $level+1; $n <= $last; ++$n) {
89 6
            unset($this->dangling[$n]);
90 6
        }
91
92
        // Consider this node a potential dangling parent
93 8
        $this->dangling[$level] = & $node;
94
95 8
        return $node;
96
    }
97
98
    /**
99
     * Process the specific document.
100
     *
101
     * @param  DOMDocument $doc  The document to process.
102
     */
103 8
    private function processDocument($doc)
104
    {
105 8
        $this->tree = & $this->newNode(self::$prefix, '', 0);
106 8
        $n = 1;
107
108 8
        $xpath = new DOMXPath($doc);
109 8
        $query = '//*[translate(name(), "123456", "......") = "h."][not(@data-hide-from-toc)]';
110
111 8
        foreach ($xpath->query($query) as $h) {
112 6
            $text = $this->getPlainText($h);
113 6
            $level = (int) substr($h->tagName, 1);
114 6
            $id = self::$prefix.$n;
115 6
            ++$n;
116
117
            // Build the tree
118 6
            $parent = & $this->getParent($level);
119 6
            $node = & $this->newNode($id, $text, $level);
120 6
            if (!isset($parent['children'])) {
121 6
                $parent['children'] = [];
122 6
            }
123 6
            $parent['children'][] = & $node;
124
125 6
            call_user_func($this->augment_callback, $doc, $h, $id);
126 8
        }
127
128 8
        $body = $doc->getElementsByTagName('body')->item(0);
129 8
        $this->html = str_replace(["<body>\n", '<body>', '</body>'], '', $doc->saveHTML($body));
130 8
    }
131
132
    /**
133
     * Debug function for dumping a TOC node and its children.
134
     *
135
     * @param  array  $node    The TOC node to dump
136
     * @param  string $indent  Indentation string.
137
     */
138 2
    private function dumpBranch($node, $indent = '')
139
    {
140 2
        echo $indent.$node['title']."\n";
141 2
        if (isset($node['children'])) {
142 1
            foreach ($node['children'] as &$child) {
143 1
                $this->dumpBranch($child, "$indent\t");
144 1
            }
145 1
        }
146 2
    }
147
148
149
    /**
150
     * Create a new TOCifier instance.
151
     *
152
     * A string containing the HTML to parse for TOC must be passed
153
     * in. The real processing will be triggered by the process()
154
     * method.
155
     *
156
     * Parsing a file can be easily performed by using
157
     * file_get_contents():
158
     *
159
     * <code>
160
     * $tocifier = new Tocifier(@file_get_content($file));
161
     * </code>
162
     *
163
     * @param string $html A chunk of valid HTML (UTF-8 encoded).
164
     */
165 8
    public function __construct($html)
166
    {
167 8
        $this->raw_html = $html;
168 8
        $this->setAugmentCallback([static::class, 'setId']);
169 8
    }
170
171
    /**
172
     * Change the augment method used by this Tocifier instance.
173
     *
174
     * By default the HTML is augmented prepending an anchor before
175
     * every valid destination. This behavior can be changed by using
176
     * Tocifier::setId() (that directly sets the ID on the destination
177
     * elements) or by providing your own callback.
178
     *
179
     * The signature of the callback to pass in should be compatible
180
     * with:
181
     *
182
     *     function callback(DOMDocument $dom, DOMElement $element, $id)
183
     *
184
     * @param callable $callback  The new function to call for
185
     *                            augmenting DOMElement
186
     */
187 8
    public function setAugmentCallback($callback)
188
    {
189 8
        $this->augment_callback = $callback;
190 8
    }
191
192
    /**
193
     * Parse and process the HTML chunk.
194
     *
195
     * The parsing phase involves picking up all the HTML header
196
     * elements (from <h1> to <h6>), so if the HTML is not well formed
197
     * or any other error is encountered this function will fail.
198
     *
199
     * @return boolean true on success, false on errors.
200
     */
201 8
    public function process()
202
    {
203
        // Check if $this->raw_html is valid
204 8
        if (!is_string($this->raw_html) || empty($this->raw_html)) {
205 2
            return false;
206
        }
207
208
        // DOMDocument sucks ass (welcome to PHP, you poor shit). I
209
        // really don't understand why it is so difficult for loadHTML()
210
        // to read a chunk of text in UTF-8...
211 8
        $html = mb_convert_encoding($this->raw_html, 'HTML-ENTITIES', 'UTF-8');
212
213
        // Parse the HTML into a DOMDocument tree
214 8
        $doc = new DOMDocument();
215 8
        if (!@$doc->loadHTML($html)) {
216
            return false;
217
        }
218
219
        // Process the doc
220 8
        $this->processDocument($doc);
221 8
        return true;
222
    }
223
224
    /**
225
     * Get the TOC (Table Of Contents) from the provided HTML.
226
     *
227
     * The HTML must be provided throught the constructor.
228
     *
229
     * The TOC is represented in the form of:
230
     *
231
     *     [
232
     *         [
233
     *             'id'       => 'TOC-1',
234
     *             'title'    => 'Item 1',
235
     *             'children' => [
236
     *                 [
237
     *                     'id'       => 'TOC-2',
238
     *                     'title'    => 'Subitem 1.1'
239
     *                 ], [
240
     *                     'id'       => 'TOC-3',
241
     *                     'title'    => 'Subitem 1.2',
242
     *                     'children' => [
243
     *                         [
244
     *                             'id'      => 'TOC-4',
245
     *                             'title    => 'Subsubitem 1.2.1'
246
     *                         ]
247
     *                     ]
248
     *                 ]
249
     *             ],
250
     *         ], [
251
     *             'id'       => 'TOC-5,
252
     *             'title'    => 'Item 2',
253
     *             'children' => [
254
     *                 [
255
     *                     'id'       => 'TOC-6',
256
     *                     'title'    => 'Subitem 2.1'
257
     *                 ], [
258
     *                     'id'       => 'TOC-7',
259
     *                     'title'    => 'Subitem 2.2'
260
     *                 ]
261
     *             ]
262
     *         ]
263
     *     ]
264
     *
265
     * The TOC is cached, so subsequent calls will return the same tree.
266
     *
267
     * @return Array An array representing the TOC. A valid array is
268
     *               always returned.
269
     */
270 3
    public function getTOC()
271
    {
272 3
        return isset($this->tree['children']) ? $this->tree['children'] : [];
273
    }
274
275
    /**
276
     * Get the HTML augmented for proper navigation.
277
     *
278
     * The HTML must be provided throught the feedHtml() method.
279
     * The returned string is cached, so subsequent calls will return
280
     * the same string without further processing.
281
     *
282
     * @return String The augmented HTML.
283
     */
284 6
    public function getHtml()
285
    {
286 6
        return $this->html;
287
    }
288
289
    /**
290
     * Dump the TOC to stdout for debugging purpose.
291
     */
292 2
    public function dumpTOC()
293
    {
294 2
        $this->dumpBranch($this->tree);
295 2
    }
296
297
    /**
298
     * Augment a DOMElement by prepending an anchor.
299
     *
300
     * An HTML fragment such as:
301
     *
302
     *     <h1>First</h2>
303
     *     <h2>Second</h1>
304
     *
305
     * will become:
306
     *
307
     *     <a id="TOC-1" class="anchor"></a><h1>First</h2>
308
     *     <a id="TOC-2" class="anchor"></a><h2>Second</h1>
309
     *
310
     * @param DOMDocument $dom      The DOM owning $element
311
     * @param DOMElement  $element  The element to augment
312
     * @param string      $id       The destination ID
313
     */
314 2
    public static function prependAnchor(DOMDocument $dom, DOMElement $element, $id)
315
    {
316 2
        $anchor = $dom->createElement('a');
317 2
        $anchor->setAttribute('id', $id);
318 2
        $anchor->setAttribute('class', 'anchor');
319 2
        $element->parentNode->insertBefore($anchor, $element);
320 2
    }
321
322
    /**
323
     * Augment a DOMElement by setting its ID.
324
     *
325
     * An HTML fragment such as:
326
     *
327
     *     <h1>First</h2>
328
     *     <h2>Second</h1>
329
     *
330
     * will become:
331
     *
332
     *     <h1 id="TOC-1" class="anchor">First</h2>
333
     *     <h2 id="TOC-2" class="anchor">Second</h1>
334
     *
335
     * @param DOMDocument $dom      The DOM owning $element
336
     * @param DOMElement  $element  The element to augment
337
     * @param string      $id       The destination ID
338
     */
339 5
    public static function setId(DOMDocument $dom, DOMElement $element, $id)
0 ignored issues
show
Unused Code introduced by
The parameter $dom is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
340
    {
341 5
        $element->setAttribute('id', $id);
342 5
        $element->setAttribute('class', 'anchor');
343 5
    }
344
}
345