HeadingsProcess::loadDomDocument()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 6
ccs 5
cts 5
cp 1
rs 9.4285
cc 1
eloc 4
nc 1
nop 0
crap 1
1
<?php
2
namespace Bookdown\Bookdown\Process\Headings;
3
4
use Psr\Log\LoggerInterface;
5
use Bookdown\Bookdown\Content\Page;
6
use Bookdown\Bookdown\Content\HeadingFactory;
7
use Bookdown\Bookdown\Fsio;
8
use Bookdown\Bookdown\Process\ProcessInterface;
9
use DomDocument;
10
use DomNode;
11
use DomNodeList;
12
use DomText;
13
use DomXpath;
14
15
class HeadingsProcess implements ProcessInterface
16
{
17
    protected $page;
18
19
    protected $html;
20
21
    protected $doc;
22
23
    protected $counts = array();
24
25
    protected $headings = array();
26
27
    protected $headingFactory;
28
29
    protected $fsio;
30
31
    protected $logger;
32
33 14
    public function __construct(
34
        LoggerInterface $logger,
35
        Fsio $fsio,
36
        HeadingFactory $headingFactory
37
    ) {
38 14
        $this->logger = $logger;
39 14
        $this->fsio = $fsio;
40 14
        $this->headingFactory = $headingFactory;
41 14
    }
42
43 13
    public function __invoke(Page $page)
44
    {
45 13
        $this->logger->info("    Processing headings for {$page->getTarget()}");
46
47 13
        $this->reset($page);
48
49 13
        $this->loadHtml();
50 13
        if ($this->html) {
51 12
            $this->loadDomDocument();
52 12
            $this->processHeadingNodes();
53 12
            $this->saveHtml();
54 12
        }
55
56 13
        $page->setHeadings($this->headings);
57 13
    }
58
59 13
    protected function reset(Page $page)
60
    {
61 13
        $this->page = $page;
62 13
        $this->html = null;
63 13
        $this->doc = null;
64 13
        $this->counts = array(
65 13
            'h2' => 0,
66 13
            'h3' => 0,
67 13
            'h4' => 0,
68 13
            'h5' => 0,
69 13
            'h6' => 0,
70
        );
71 13
        $this->headings = array();
72
73 13
        if ($this->page->isIndex()) {
74 12
            $this->headings[] = $this->headingFactory->newInstance(
75 12
                $this->page->getNumber(),
76 12
                $this->page->getTitle(),
77 12
                $this->page->getHref()
78 12
            );
79 12
        }
80 13
    }
81
82 13
    protected function loadHtml()
83
    {
84 13
        $this->html = $this->fsio->get($this->page->getTarget());
85 13
    }
86
87 12
    protected function saveHtml()
88
    {
89 12
        $this->fsio->put($this->page->getTarget(), $this->html);
90 12
    }
91
92 12
    protected function loadDomDocument()
93
    {
94 12
        $this->doc = new DomDocument();
95 12
        $this->doc->formatOutput = true;
96 12
        $this->doc->loadHtml(mb_convert_encoding($this->html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NODEFDTD);
97 12
    }
98
99 12
    protected function processHeadingNodes()
100
    {
101 12
        $nodes = $this->getHeadingNodes();
102 12
        $this->setPageTitle($nodes);
103 12
        $this->addHeadings($nodes);
104 12
        $this->setHtmlFromDomDocument();
105 12
    }
106
107 12
    protected function getHeadingNodes()
108
    {
109 12
        $xpath = new DomXpath($this->doc);
110
        $query = '/html/body/*[self::h1 or self::h2 or self::h3 or self::h4 '
111 12
               . ' or self::h5 or self::h6]';
112 12
        return $xpath->query($query);
113
    }
114
115 12
    protected function setPageTitle(DomNodeList $nodes)
116
    {
117 12
        $node = $nodes->item(0);
118 12
        if ($node) {
119 12
            $this->page->setTitle($node->nodeValue);
120 12
        }
121 12
    }
122
123 12
    protected function addHeadings($nodes)
124
    {
125 12
        foreach ($nodes as $node) {
126 12
            $this->addHeading($node);
127 12
        }
128 12
    }
129
130 12
    protected function addHeading(DomNode $node)
131
    {
132 12
        $heading = $this->newHeading($node);
133 12
        $this->headings[] = $heading;
134
135 12
        $number = new DOMText();
136 12
        $number->nodeValue = $heading->getNumber() . ' ';
137 12
        $node->insertBefore($number, $node->firstChild);
138
139 12
        $node->setAttribute('id', $heading->getAnchor());
140 12
    }
141
142 12
    protected function newHeading(DomNode $node)
143
    {
144
        // the full heading number
145 12
        $number = $this->getHeadingNumber($node);
146
147
        // strip the leading <hN> and the closing </hN>
148
        // this assumes the <hN> tag has no attributes
149 12
        $title = substr($node->C14N(), 4, -5);
150
151
        // lose the trailing dot for the ID
152 12
        $id = substr($number, 0, -1);
153
154 12
        return $this->headingFactory->newInstance(
155 12
            $number,
156 12
            $title,
157 12
            $this->page->getHref(),
158
            $id
159 12
        );
160
    }
161
162 12
    protected function getHeadingNumber(DomNode $node)
163
    {
164 12
        $this->setCounts($node);
165 12
        $string = '';
166 12
        foreach ($this->counts as $count) {
167 12
            if (! $count) {
168 12
                break;
169
            }
170 12
            $string .= "{$count}.";
171 12
        }
172 12
        return $this->page->getNumber() . $string;
173
    }
174
175 12
    protected function setCounts(DomNode $node)
176
    {
177 12
        foreach ($this->counts as $level => $count) {
178 12
            if ($level == $node->nodeName) {
179 12
                $this->counts[$level] ++;
180 12
            }
181 12
            if ($level > $node->nodeName) {
182 12
                $this->counts[$level] = 0;
183 12
            }
184 12
        }
185 12
    }
186
187 12
    protected function setHtmlFromDomDocument()
188
    {
189
        // retain the modified html
190 12
        $this->html = trim($this->doc->saveHtml($this->doc->documentElement));
191
192
        // strip the html and body tags added by DomDocument
193 12
        $this->html = substr(
194 12
            $this->html,
195 12
            strlen('<html><body>'),
196 12
            -1 * strlen('</body></html>')
197 12
        );
198
199
        // still may be whitespace all about
200 12
        $this->html = trim($this->html) . PHP_EOL;
201 12
    }
202
}
203