Completed
Push — master ( 94943b...ea0a0b )
by Nicola
02:48
created

Tocifier::setId()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 5
rs 9.4285
cc 1
eloc 3
nc 1
nop 3
1
<?php
2
3
/*
4
 * Tocifier is intentionally decoupled from SilverStripe to be able to
5
 * test it without needing to put all the test infrastructure up.
6
 */
7
class Tocifier
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class must be in a namespace of at least one level to avoid collisions.

You can fix this by adding a namespace to your class:

namespace YourVendor;

class YourClass { }

When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries.

Loading history...
8
{
9
    // Prefix to prepend to every URL fragment
10
    public static $prefix = 'TOC-';
11
12
    // The original HTML
13
    private $_raw_html = '';
14
15
    // $_raw_html augmented for proper navigation
16
    private $_html = '';
17
18
    // The most recently generated TOC tree.
19
    private $_tree;
20
21
    // Array of references to the potential parents
22
    private $_dangling = array();
23
24
    // Callback for augmenting a single DOMElement
25
    private $_augment_callback;
26
27
28
    /**
29
     * Get the TOC node closest to a given nesting level.
30
     *
31
     * @param  int $level  The requested nesting level.
32
     * @return array
33
     */
34
    private function &_getParent($level)
35
    {
36
        while (--$level >= 0) {
37
            if (isset($this->_dangling[$level])) {
38
                return $this->_dangling[$level];
39
            }
40
        }
41
        // This should never be reached
42
        assert(false);
43
    }
44
45
    /**
46
     * Get the plain text content from a DOM element.
47
     *
48
     * @param  DOMElement $tag  The DOM element to inspect.
49
     * @return string
50
     */
51
    private function _getPlainText(DOMElement $tag)
52
    {
53
        // Work on a copy
54
        $clone = $tag->cloneNode(true);
55
56
        // Strip unneded tags (<small>)
57
        while (($tag = $clone->getElementsByTagName('small')) && $tag->length) {
58
            $tag->item(0)->parentNode->removeChild($tag->item(0));
59
        }
60
61
        return $clone->textContent;
62
    }
63
64
    /**
65
     * Create a new TOC node.
66
     *
67
     * @param  string $id     Node id, used for anchoring
68
     * @param  string $text   Title text
69
     * @param  int    $level  The nesting level of the node
70
     * @return array
71
     */
72
    private function &_newNode($id, $text, $level)
73
    {
74
        $node = array(
75
            'id'    => $id,
76
            'title' => $text
77
        );
78
79
        // Clear the trailing dangling parents after level, if any
80
        end($this->_dangling);
81
        $last = key($this->_dangling);
82
        for ($n = $level+1; $n <= $last; ++$n) {
83
            unset($this->_dangling[$n]);
84
        }
85
86
        // Consider this node a potential dangling parent
87
        $this->_dangling[$level] =& $node;
88
89
        return $node;
90
    }
91
92
    /**
93
     * Process the specific document.
94
     *
95
     * @param  DOMDocument $doc  The document to process.
96
     */
97
    private function _processDocument($doc)
98
    {
99
        $this->_tree =& $this->_newNode(self::$prefix, '', 0);
100
        $n = 1;
101
102
        $xpath = new DOMXPath($doc);
103
        $query = '//*[translate(name(), "123456", "......") = "h."][not(@data-hide-from-toc)]';
104
105
        foreach ($xpath->query($query) as $h) {
106
            $text = $this->_getPlainText($h);
107
            $level = (int) substr($h->tagName, 1);
108
            $id = self::$prefix . $n;
109
            ++$n;
110
111
            // Build the tree
112
            $parent =& $this->_getParent($level);
113
            $node =& $this->_newNode($id, $text, $level);
114
            if (! isset($parent['children'])) {
115
                $parent['children'] = array();
116
            }
117
            $parent['children'][] =& $node;
118
119
            call_user_func($this->_augment_callback, $doc, $h, $id);
120
        }
121
122
        $body = $doc->getElementsByTagName('body')->item(0);
123
        $this->_html = str_replace(array("<body>\n", '</body>'), '',
124
                                   $doc->saveHTML($body));
125
    }
126
127
    /**
128
     * Debug function for dumping a TOC node and its children.
129
     *
130
     * @param  array  $node    The TOC node to dump
131
     * @param  string $indent  Indentation string.
132
     */
133
    private function _dumpBranch($node, $indent = '')
134
    {
135
        echo $indent . $node['title'] . "\n";
136
        if (isset($node['children'])) {
137
            foreach ($node['children'] as &$child) {
138
                $this->_dumpBranch($child, "$indent\t");
139
            }
140
        }
141
    }
142
143
144
    /**
145
     * Create a new TOCifier instance.
146
     *
147
     * A string containing the HTML to parse for TOC must be passed
148
     * in. The real processing will be triggered by the process()
149
     * method.
150
     *
151
     * Parsing a file can be easily performed by using
152
     * file_get_contents():
153
     *
154
     * <code>
155
     * $tocifier = new Tocifier(@file_get_content($file));
156
     * </code>
157
     *
158
     * @param string $html A chunk of valid HTML (UTF-8 encoded).
159
     */
160
    public function __construct($html)
161
    {
162
        $this->_raw_html = $html;
163
        // Default augmenting method (kept for backward compatibility)
164
        $this->setAugmentCallback(array(__CLASS__, 'prependAnchor'));
165
    }
166
167
    /**
168
     * Change the augment method used by this Tocifier instance.
169
     *
170
     * By default the HTML is augmented prepending an anchor before
171
     * every valid destination. This behavior can be changed by using
172
     * Tocifier::setId() (that directly sets the ID on the destination
173
     * elements) or by providing your own callback.
174
     *
175
     * The signature of the callback to pass in should be compatible
176
     * with:
177
     *
178
     *     function callback(DOMDocument $dom, DOMElement $element, $id)
179
     *
180
     * @param callable $callback  The new function to call for
181
     *                            augmenting DOMElement
182
     */
183
    public function setAugmentCallback($callback)
184
    {
185
        $this->_augment_callback = $callback;
186
    }
187
188
    /**
189
     * Parse and process the HTML chunk.
190
     *
191
     * The parsing phase involves picking up all the HTML header
192
     * elements (from <h1> to <h6>), so if the HTML is not well formed
193
     * or any other error is encountered this function will fail.
194
     *
195
     * @return boolean true on success, false on errors.
196
     */
197
    public function process()
198
    {
199
        // Check if $this->_raw_html is valid
200
        if (! is_string($this->_raw_html) || empty($this->_raw_html)) {
201
            return false;
202
        }
203
204
        // DOMDocument sucks ass (welcome to PHP, you poor shit). I
205
        // really don't understand why it is so difficult for loadHTML()
206
        // to read a chunk of text in UTF-8...
207
        $html = mb_convert_encoding($this->_raw_html, 'HTML-ENTITIES', 'UTF-8');
208
209
        // Parse the HTML into a DOMDocument tree
210
        $doc = new DOMDocument();
211
        if (! @$doc->loadHTML($html)) {
212
            return false;
213
        }
214
215
        // Process the doc
216
        $this->_processDocument($doc);
217
        return true;
218
    }
219
220
    /**
221
     * Get the TOC (Table Of Contents) from the provided HTML.
222
     *
223
     * The HTML must be provided throught the constructor.
224
     *
225
     * The TOC is represented in the form of:
226
     *
227
     * <code>
228
     * array(
229
     *     array('id'       => 'TOC-1',
230
     *           'title'    => 'Item 1',
231
     *           'children' => array(
232
     *               array('id'       => 'TOC-2',
233
     *                     'title'    => 'Subitem 1.1'
234
     *               ),
235
     *               array('id'       => 'TOC-3',
236
     *                     'title'    => 'Subitem 1.2',
237
     *                     'children' => array(
238
     *                         array('id'      => 'TOC-4',
239
     *                               'title    => 'Subsubitem 1.2.1'
240
     *     ))))),
241
     *     array('id'       => 'TOC-5,
242
     *           'title'    => 'Item 2',
243
     *           'children' => array(
244
     *               array('id'       => 'TOC-6',
245
     *                     'title'    => 'Subitem 2.1'
246
     *               ),
247
     *               array('id'       => 'TOC-7',
248
     *                     'title'    => 'Subitem 2.2'
249
     * ))));
250
     * </code>
251
     *
252
     * The TOC is cached, so subsequent calls will return the same tree.
253
     *
254
     * @return Array An array representing the TOC. A valid array is
0 ignored issues
show
Documentation introduced by
Should the return type not be string|array?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
255
     *               always returned.
256
     */
257
    public function getTOC()
258
    {
259
        return isset($this->_tree['children']) ? $this->_tree['children'] : array();
260
    }
261
262
    /**
263
     * Get the HTML augmented for proper navigation.
264
     *
265
     * The HTML must be provided throught the feedHtml() method.
266
     * The returned string is cached, so subsequent calls will return
267
     * the same string without further processing.
268
     *
269
     * @return String The augmented HTML.
270
     */
271
    public function getHtml()
272
    {
273
        return $this->_html;
274
    }
275
276
    /**
277
     * Dump the TOC to stdout for debugging purpose.
278
     */
279
    public function dumpTOC()
280
    {
281
        $this->_dumpBranch($this->_tree);
282
    }
283
284
    /**
285
     * Augment a DOMElement by prepending an anchor.
286
     *
287
     * An HTML fragment such as:
288
     *
289
     *     <h1>First</h2>
290
     *     <h2>Second</h1>
291
     *
292
     * will become:
293
     *
294
     *     <a id="TOC-1" class="anchor"></a><h1>First</h2>
295
     *     <a id="TOC-2" class="anchor"></a><h2>Second</h1>
296
     *
297
     * @param DOMDocument $dom      The DOM owning $element
298
     * @param DOMElement  $element  The element to augment
299
     * @param string      $id       The destination ID
300
     */
301
    public static function prependAnchor(DOMDocument $dom, DOMElement $element, $id)
302
    {
303
        $anchor = $dom->createElement('a');
304
        $anchor->setAttribute('id', $id);
305
        $anchor->setAttribute('class', 'anchor');
306
        $element->parentNode->insertBefore($anchor, $element);
307
    }
308
309
    /**
310
     * Augment a DOMElement by setting its ID.
311
     *
312
     * An HTML fragment such as:
313
     *
314
     *     <h1>First</h2>
315
     *     <h2>Second</h1>
316
     *
317
     * will become:
318
     *
319
     *     <h1 id="TOC-1" class="anchor">First</h2>
320
     *     <h2 id="TOC-2" class="anchor">Second</h1>
321
     *
322
     * @param DOMDocument $dom      The DOM owning $element
323
     * @param DOMElement  $element  The element to augment
324
     * @param string      $id       The destination ID
325
     */
326
    public static function setId(DOMDocument $dom, DOMElement $element, $id)
327
    {
328
        $element->setAttribute('id', $id);
329
        $element->setAttribute('class', 'anchor');
330
    }
331
}
332