Passed
Push — master ( de3cce...9ef401 )
by Guillaume
07:53
created

RickAstley::insertDashTableOfContents()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 8
c 0
b 0
f 0
ccs 0
cts 5
cp 0
rs 10
cc 1
nc 1
nop 1
crap 2
1
<?php
2
3
namespace Godbout\DashDocsetBuilder\Docsets;
4
5
use Illuminate\Support\Str;
6
use Illuminate\Support\Collection;
7
use Wa72\HtmlPageDom\HtmlPageCrawler;
8
use Illuminate\Support\Facades\Storage;
9
10
class RickAstley extends BaseDocset
11
{
12
    public const CODE = 'rick-astley';
13
    public const NAME = 'Rick Astley';
14
    public const URL = 'rickastley.co.uk';
15
    public const INDEX = 'index.html';
16
    public const PLAYGROUND = '';
17
    public const ICON_16 = 'icons/favicon-16x16.png';
18
    public const ICON_32 = 'icons/favicon-32x32.png';
19
    public const EXTERNAL_DOMAINS = [
20
        'fonts.googleapis.com',
21
        'widget.songkick.com',
22
        'cdn-images.mailchimp.com',
23
        's3.amazonaws.com',
24
        'ajax.googleapis.com'
25
    ];
26
27
28
    public function entries(string $file): Collection
29
    {
30
        $entries = collect();
31
32
        $crawler = HtmlPageCrawler::create(Storage::get($file));
33
34
        if (Str::contains($file, "{$this->url()}/index.html")) {
35
            $crawler->filter('#main_menu li:not(:first-child) a')->each(function () use ($entries) {
36
                $entries->push([
37
                    'name' => 'Rick Astley - Official Website',
38
                    'type' => 'Guide',
39
                    'path' => $this->url() . '/index.html'
40
                ]);
41
            });
42
        }
43
44
        $crawler->filter('#main_menu li:not(:first-child) a')->each(function (HtmlPageCrawler $node) use ($entries) {
45
            $entries->push([
46
                'name' => $node->text(),
47
                'type' => 'Section',
48
                'path' => $this->url() . '/' . $node->attr('href')
49
            ]);
50
        });
51
52
        return $entries;
53
    }
54
55
    public function format(string $html): string
56
    {
57
        $crawler = HtmlPageCrawler::create($html);
58
59
        $this->removeHeader($crawler);
60
        $this->removeFooter($crawler);
61
62
        $this->removeUnwantedHTML($crawler);
63
        $this->removeUnwantedJavaScript($crawler);
64
65
        $this->insertDashTableOfContents($crawler);
66
67
        return $crawler->saveHTML();
68
    }
69
70
    protected function removeHeader(HtmlPageCrawler $crawler)
71
    {
72
        $crawler->filter('#header')->remove();
73
    }
74
75
    protected function removeFooter(HtmlPageCrawler $crawler)
76
    {
77
        $crawler->filter('#footer')->remove();
78
    }
79
80
    protected function removeUnwantedHTML(HtmlPageCrawler $crawler)
81
    {
82
        $crawler->filterXPath("//img[@src[contains(.,'secure.adnxs.com')]]")->remove();
83
    }
84
85
    protected function removeUnwantedJavaScript(HtmlPageCrawler $crawler)
86
    {
87
        $crawler->filter('noscript')->remove();
88
        $crawler->filterXPath("//script[@src[contains(.,'platform.twitter.com')]]")->remove();
89
        $crawler->filterXPath("//script[@src[contains(.,'googletagmanager')]]")->remove();
90
        $crawler->filterXPath("//script[@src[contains(.,'googleadservices')]]")->remove();
91
        $crawler->filterXPath("//script[text()[contains(.,'googletagmanager')]]")->remove();
92
        $crawler->filterXPath("//script[text()[contains(.,'gtag')]]")->remove();
93
        $crawler->filterXPath("//script[text()[contains(.,'connect.facebook.net')]]")->remove();
94
        $crawler->filterXPath("//script[text()[contains(.,'google_conversion_id')]]")->remove();
95
        $crawler->filterXPath("//script[text()[contains(.,'googleadservices')]]")->remove();
96
        $crawler->filterXPath("//script[text()[contains(.,'platform.twitter.com')]]")->remove();
97
        $crawler->filterXPath("//script[text()[contains(.,'twttr.conversion')]]")->remove();
98
    }
99
100
    protected function insertDashTableOfContents(HtmlPageCrawler $crawler)
101
    {
102
        $crawler->filter('head')
103
            ->before('<a name="//apple_ref/cpp/Section/Top" class="dashAnchor"></a>');
104
105
        $crawler->filter('div.page_title, div.product_title')->each(static function (HtmlPageCrawler $node) {
106
            $node->before(
107
                '<a id="' . Str::slug($node->text()) . '" name="//apple_ref/cpp/Section/' . rawurlencode($node->text()) . '" class="dashAnchor"></a>'
108
            );
109
        });
110
    }
111
}
112