Passed
Push — master ( 3afcce...e044cf )
by Guillaume
08:13
created

Tiki::entries()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 11
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 7
nc 1
nop 1
dl 0
loc 11
ccs 0
cts 8
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace App\Docsets;
4
5
use Illuminate\Support\Str;
6
use Illuminate\Support\Collection;
7
use Wa72\HtmlPageDom\HtmlPageCrawler;
8
use Illuminate\Support\Facades\Storage;
9
use Godbout\DashDocsetBuilder\Docsets\BaseDocset;
10
11
class Tiki extends BaseDocset
12
{
13
    public const CODE = 'tiki';
14
    public const NAME = 'Tiki';
15
    public const URL = 'doc.tiki.org';
16
    public const INDEX = 'All-the-Documentation.html';
17
    public const PLAYGROUND = 'https://tiki.org/Demo';
18
    public const ICON_16 = '../../icons/icon.png';
19
    public const ICON_32 = '../../icons/[email protected]';
20
    public const EXTERNAL_DOMAINS = [
21
        'themes.tiki.org',
22
    ];
23
24
25
    public function grab(): bool
26
    {
27
        $toIgnore = implode('|', [
28
            '\?refresh',
29
            '\?session_filters',
30
            '\?sort_mode',
31
            '/Plugins-',
32
            'comzone=',
33
            'cookietab=',
34
            'fullscreen=',
35
            'offset=',
36
            'todate=',
37
            'viewmode=',
38
            'PDF\.js',
39
            'Plugins(\?structure=.*)?$',
40
            'tikiversion=',
41
            'wp_files_sort_mode[0-9]=',
42
        ]);
43
44
        $toGet = implode('|', [
45
            '\.css',
46
            '\.gif',
47
            '\.ico',
48
            '\.jpg',
49
            '\.js',
50
            '\.png',
51
            '\.svg',
52
            '\.webmanifest',
53
            '/display',
54
            '/LIST',
55
            '/Module-',
56
            '/Plugin[^-]',
57
            '-Field(s)?(\?structure=HomePage\+UserGuide)?$',
58
            'Tiki_org_family',
59
            '[^:=]Wiki-Syntax'
60
        ]);
61
62
        system(
63
            "echo; wget doc.tiki.org/All-the-Documentation \
64
                --mirror \
65
                --trust-server-names \
66
                --header 'Cookie: javascript_enabled_detect=true' \
67
                --reject-regex='{$toIgnore}' \
68
                --accept-regex='{$toGet}' \
69
                --ignore-case \
70
                --page-requisites \
71
                --adjust-extension \
72
                --convert-links \
73
                --span-hosts \
74
                --domains={$this->externalDomains()} \
75
                --directory-prefix=storage/{$this->downloadedDirectory()} \
76
                -e robots=off \
77
                --quiet \
78
                --show-progress",
79
            $result
80
        );
81
82
        return $result === 0;
83
    }
84
85
    public function entries(string $file): Collection
86
    {
87
        $crawler = HtmlPageCrawler::create(Storage::get($file));
88
89
        $entries = collect();
90
        $entries = $entries->merge($this->pluginEntries($crawler, $file));
91
        $entries = $entries->merge($this->moduleEntries($crawler, $file));
92
        $entries = $entries->merge($this->fieldEntries($crawler, $file));
93
        $entries = $entries->merge($this->styleEntries($crawler, $file));
94
95
        return $entries;
96
    }
97
98
    protected function pluginEntries(HtmlPageCrawler $crawler, string $file)
99
    {
100
        $entries = collect();
101
102
        if (preg_match('/Plugin/i', $file)) {
103
            $path = $crawler->filter('link[rel=canonical]')->attr('href');
104
105
            $crawler->filter('#page-data > h1:first-of-type')->each(function (HtmlPageCrawler $node) use ($entries, $file, $path) {
106
                $entries->push([
107
                        'name' => $node->text(),
108
                        'type' => 'Plugin',
109
                        'path' => Str::after($file . '#' . Str::slug($path), $this->innerDirectory()),
110
                    ]);
111
            });
112
        }
113
114
        return $entries;
115
    }
116
117
    protected function moduleEntries(HtmlPageCrawler $crawler, string $file)
118
    {
119
        $entries = collect();
120
121
        if (preg_match('/Module/i', $file)) {
122
            $path = $crawler->filter('link[rel=canonical]')->attr('href');
123
124
            $crawler->filter('#page-data > h1:first-of-type')->each(function (HtmlPageCrawler $node) use ($entries, $file, $path) {
125
                $entries->push([
126
                        'name' => $node->text(),
127
                        'type' => 'Module',
128
                        'path' => Str::after($file . '#' . Str::slug($path), $this->innerDirectory()),
129
                    ]);
130
            });
131
        }
132
133
        return $entries;
134
    }
135
136
    protected function fieldEntries(HtmlPageCrawler $crawler, string $file)
137
    {
138
        $entries = collect();
139
140
        if (preg_match('/Tracker-Field/i', $file)) {
141
            $path = $crawler->filter('link[rel=canonical]')->attr('href');
142
143
            $crawler->filter('#page-data > h1:first-of-type')->each(function (HtmlPageCrawler $node) use ($entries, $file, $path) {
144
                $entries->push([
145
                        'name' => $node->text(),
146
                        'type' => 'Field',
147
                        'path' => Str::after($file . '#' . Str::slug($path), $this->innerDirectory()),
148
                    ]);
149
            });
150
        }
151
152
        return $entries;
153
    }
154
155
    protected function styleEntries(HtmlPageCrawler $crawler, string $file)
156
    {
157
        $entries = collect();
158
159
        if (preg_match('/Wiki-Syntax/i', $file)) {
160
            $path = $crawler->filter('link[rel=canonical]')->attr('href');
161
162
            $crawler->filter('#page-data > h1:first-of-type')->each(function (HtmlPageCrawler $node) use ($entries, $file, $path) {
163
                $entries->push([
164
                        'name' => $node->text(),
165
                        'type' => 'Style',
166
                        'path' => Str::after($file . '#' . Str::slug($path), $this->innerDirectory()),
167
                    ]);
168
            });
169
        }
170
171
        return $entries;
172
    }
173
174
    public function format(string $html): string
175
    {
176
        $crawler = HtmlPageCrawler::create($html);
177
178
        $this->removeNavbar($crawler);
179
        $this->removeLeftSidebarButton($crawler);
180
        $this->removeFullscreenButton($crawler);
181
        $this->removePageTopModules($crawler);
182
        $this->removeWikiActionsWrapper($crawler);
183
        $this->removeBreadcrumb($crawler);
184
        $this->removeTopbar($crawler);
185
        $this->removeLeftSidebar($crawler);
186
        $this->removeRightSidebar($crawler);
187
        $this->removePagebar($crawler);
188
        $this->removeFooter($crawler);
189
        $this->removeUnwantedJavaScript($crawler);
190
191
        $this->updateCss($crawler);
192
193
        $this->insertOnlineRedirection($crawler);
194
        $this->insertDashTableOfContents($crawler);
195
196
        return $crawler->saveHTML();
197
    }
198
199
    protected function removeNavbar(HtmlPageCrawler $crawler)
200
    {
201
        $crawler->filter('nav.navbar')->remove();
202
    }
203
204
    protected function removeLeftSidebarButton(HtmlPageCrawler $crawler)
205
    {
206
        $crawler->filter('#row-middle > div.side-col-toggle-container')->remove();
207
    }
208
209
    protected function removeFullscreenButton(HtmlPageCrawler $crawler)
210
    {
211
        $crawler->filter('#fullscreenbutton')->remove();
212
    }
213
214
    protected function removePageTopModules(HtmlPageCrawler $crawler)
215
    {
216
        $crawler->filter('#pagetop_modules')->remove();
217
    }
218
219
    protected function removeWikiActionsWrapper(HtmlPageCrawler $crawler)
220
    {
221
        $crawler->filter('#col1 > div.wikiactions_wrapper')->remove();
222
    }
223
224
    protected function removeBreadcrumb(HtmlPageCrawler $crawler)
225
    {
226
        $crawler->filter('nav.nav-breadcrumb')->remove();
227
    }
228
229
    protected function removeTopbar(HtmlPageCrawler $crawler)
230
    {
231
        $crawler->filter('#topbar')->remove();
232
    }
233
234
    protected function removeLeftSidebar(HtmlPageCrawler $crawler)
235
    {
236
        $crawler->filter('#col2')->remove();
237
    }
238
239
    protected function removeRightSidebar(HtmlPageCrawler $crawler)
240
    {
241
        $crawler->filter('script[src*="autoToc.js"]')->remove();
242
    }
243
244
    protected function removePagebar(HtmlPageCrawler $crawler)
245
    {
246
        $crawler->filter('#page-bar')->remove();
247
    }
248
249
    protected function removeFooter(HtmlPageCrawler $crawler)
250
    {
251
        $crawler->filter('#footer')->remove();
252
    }
253
254
    protected function removeUnwantedJavaScript(HtmlPageCrawler $crawler)
255
    {
256
        $crawler->filter('script[src*=autosave]')->remove();
257
        $crawler->filter('script[src*=gtag]')->remove();
258
        $crawler->filter('noscript')->remove();
259
        $crawler->filterXPath("//script[text()[contains(.,'piwik.tiki.org')]]")->remove();
260
        $crawler->filterXPath("//script[text()[contains(.,'gtag')]]")->remove();
261
    }
262
263
    protected function updateCSS(HtmlPageCrawler $crawler)
264
    {
265
        $this->updateTopPadding($crawler);
266
        $this->updateArticlePadding($crawler);
267
    }
268
269
    protected function updateTopPadding(HtmlPageCrawler $crawler)
270
    {
271
        $crawler->filter('body')
272
            ->removeClass('navbar-padding')
273
            ->addClass('hide_zone_left')
274
            ->css('padding-top', '0')
275
        ;
276
    }
277
278
    protected function updateArticlePadding(HtmlPageCrawler $crawler)
279
    {
280
        $crawler->filter('article#top')
281
            ->css('padding-top', '44px')
282
        ;
283
    }
284
285
    protected function insertOnlineRedirection(HtmlPageCrawler $crawler)
286
    {
287
        $onlineUrl = '';
288
        $meta = $crawler->filter('meta[property="og:url"]');
289
290
        if ($meta->getDOMDocument()) {
291
            $onlineUrl = $meta->attr('content');
292
        }
293
294
        $crawler->filter('html')->prepend("<!-- Online page at $onlineUrl -->");
295
    }
296
297
    protected function insertDashTableOfContents(HtmlPageCrawler $crawler)
298
    {
299
        $crawler->filter('#page-data > h1:first-of-type')
300
            ->before('<a name="//apple_ref/cpp/Section/Top" class="dashAnchor"></a>');
301
302
        $crawler->filter('h2')->each(static function (HtmlPageCrawler $node) {
303
            $node->before(
304
                '<a id="' . Str::slug($node->text()) . '" name="//apple_ref/cpp/Section/' . rawurlencode($node->text()) . '" class="dashAnchor"></a>'
305
            );
306
        });
307
    }
308
}
309