Crawler::title()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Jclyons52\PagePreview;
4
5
use Jclyons52\PHPQuery\Document;
6
7
class Crawler
8
{
9
    /**
10
     * PHPQuery document object that will be used to select elements
11
     * @var \Jclyons52\PHPQuery\Document
12
     */
13
    private $document;
14
15 51
    public function __construct(Document $document)
16
    {
17 51
        $this->document = $document;
18 51
    }
19
20
    /**
21
     * @return array
22
     */
23 48
    public function getPreviewData($url)
24
    {
25 48
        $title = $this->title();
26
27 48
        $images = $this->formatImages($url);
28
29 48
        $description = $this->meta('description');
30
31 48
        $meta = $this->meta();
32
33 48
        $keywords = $this->metaKeywords();
34
35 48
        if ($keywords !== []) {
36 42
            $meta['keywords'] = $keywords;
37 42
        }
38
39
        return [
40 48
            'title' => $title,
41 48
            'images' => $images,
42 48
            'description' => $description,
43 48
            'url' => $url->original,
44 48
            'meta' => $meta,
45 48
        ];
46 48
    }
47
48
    /**
49
     * get source attributes of all image tags on the page
50
     * @return array<String>
51
     */
52 51
    public function images()
53 3
    {
54 51
        $images = $this->document->querySelectorAll('img');
55
56 51
        if ($images === []) {
57 3
            return [];
58
        }
59
60 48
        $urls = $images->attr('src');
61
62
        $urls = array_filter($urls, function ($url) {
63 48
            $url = trim($url);
64 48
            return $url;
65 48
        });
66
67 48
        return $urls;
68
    }
69
70
    /**
71
     * @return string
72
     */
73 48
    private function title()
74
    {
75 48
        return $this->document->querySelector('title')->text();
76
    }
77
78
    /**
79
     * @return mixed
80
     */
81 48
    private function metaKeywords()
82
    {
83 48
        $keywordsElement = $this->document->querySelector('meta[name="keywords"]');
84
85 48
        if (!$keywordsElement) {
86 6
            return [];
87
        }
88
89 42
        $keywordString = $keywordsElement->attr('content');
90
91 42
        $keywords = explode(',', $keywordString);
92
93
        return array_map(function ($word) {
94 42
            return trim($word);
95
96 42
        }, $keywords);
97
    }
98
99
    /**
100
     * @param string $element
101
     * @return array
102
     */
103 48
    private function meta($element = null)
104
    {
105 48
        $selector = "meta";
106 48
        if ($element === null) {
107 48
            $metaTags = $this->document->querySelectorAll($selector);
108 48
            return $this->metaTagsToArray($metaTags);
109
        }
110
111 48
        $selector .= "[name='{$element}']";
112 48
        $metaTags =  $this->document->querySelector($selector);
113 48
        if ($metaTags === null) {
114 6
            return null;
115
        }
116 42
        return  $metaTags->attr('content');
117
    }
118
119
    /**
120
     * @param \Jclyons52\PHPQuery\Support\NodeCollection $metaTags
121
     * @return array
122
     */
123 48
    private function metaTagsToArray($metaTags)
124
    {
125 48
        $values = [];
126 48
        foreach ($metaTags as $meta) {
127 48
            $name = $meta->attr('name');
128 48
            if ($name === '') {
129 48
                $name = $meta->attr('property');
130 48
            }
131 48
            $content = $meta->attr('content');
132 48
            if ($name === '' || $content == '') {
133 48
                continue;
134
            }
135 42
            $values[$name] = $content;
136 48
        }
137 48
        return $values;
138
    }
139
140
    /**
141
     * @param $url Url
142
     * @return mixed
143
     */
144
    private function formatImages($url)
145
    {
146 48
        return array_map(function ($imageUrl) use ($url) {
147 45
            return $url->formatRelativeToAbsolute($imageUrl);
148 48
        }, $this->images());
149
    }
150
}
151