Completed
Push — master ( ae312b...55e5cb )
by Joseph
03:09
created

Crawler::formatImages()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 9.4285
cc 1
eloc 4
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Jclyons52\PagePreview;
4
5
use Jclyons52\PHPQuery\Document;
6
7
class Crawler
8
{
9
    /**
10
     * PHPQuery document object that will be used to select elements
11
     * @var \Jclyons52\PHPQuery\Document
12
     */
13
    private $document;
14
15 48
    public function __construct(Document $document)
16
    {
17 48
        $this->document = $document;
18 48
    }
19
20
    /**
21
     * @return array
22
     */
23 45
    public function getPreviewData($url)
24
    {
25 45
        $title = $this->title();
26
27 45
        $images = $this->formatImages($url);
28
29 45
        $description = $this->meta('description');
30
31 45
        $meta = $this->meta();
32
33 45
        $keywords = $this->metaKeywords();
34
35 45
        if ($keywords !== []) {
36 39
            $meta['keywords'] = $keywords;
37 26
        }
38
39
        return [
40 45
            'title' => $title,
41 45
            'images' => $images,
42 45
            'description' => $description,
43 45
            'url' => $url->original,
44 45
            'meta' => $meta,
45 30
        ];
46 30
    }
47
48
    /**
49
     * get source attributes of all image tags on the page
50
     * @return array<String>
51
     */
52 48
    public function images()
53 2
    {
54 48
        $images = $this->document->querySelectorAll('img');
55
56 48
        if ($images === []) {
57 3
            return [];
58
        }
59
60 45
        $urls = $images->attr('src');
61
62
        $urls = array_filter($urls, function ($url) {
63 45
            $url = trim($url);
64 45
            return $url;
65 45
        });
66
67 45
        return $urls;
68
    }
69
70
    /**
71
     * @return string
72
     */
73 45
    private function title()
74
    {
75 45
        return $this->document->querySelector('title')->text();
76
    }
77
78
    /**
79
     * @return mixed
80
     */
81 45
    private function metaKeywords()
82
    {
83 45
        $keywordsElement = $this->document->querySelector('meta[name="keywords"]');
84
85 45
        if (!$keywordsElement) {
86 6
            return [];
87
        }
88
89 39
        $keywordString = $keywordsElement->attr('content');
90
91 39
        $keywords = explode(',', $keywordString);
92
93
        return array_map(function ($word) {
94 39
            return trim($word);
95
96 39
        }, $keywords);
97
    }
98
99
    /**
100
     * @param string $element
101
     * @return array
102
     */
103 45
    private function meta($element = null)
104
    {
105 45
        $selector = "meta";
106 45
        if ($element === null) {
107 45
            $metaTags = $this->document->querySelectorAll($selector);
108 45
            return $this->metaTagsToArray($metaTags);
109
        }
110
111 45
        $selector .= "[name='{$element}']";
112 45
        $metaTags =  $this->document->querySelector($selector);
113 45
        if ($metaTags === null) {
114 6
            return null;
115
        }
116 39
        return  $metaTags->attr('content');
117
    }
118
119
    /**
120
     * @param \Jclyons52\PHPQuery\Support\NodeCollection $metaTags
121
     * @return array
122
     */
123 45
    private function metaTagsToArray($metaTags)
124
    {
125 45
        $values = [];
126 45
        foreach ($metaTags as $meta) {
127 45
            $name = $meta->attr('name');
128 45
            if ($name === '') {
129 45
                $name = $meta->attr('property');
130 30
            }
131 45
            $content = $meta->attr('content');
132 45
            if ($name === '' || $content == '') {
133 45
                continue;
134
            }
135 39
            $values[$name] = $content;
136 30
        }
137 45
        return $values;
138
    }
139
140
    /**
141
     * @param $url Url
142
     * @return mixed
143
     */
144
    private function formatImages($url)
145
    {
146 45
        return array_map(function ($imageUrl) use ($url) {
147 42
            return $url->formatRelativeToAbsolute($imageUrl);
148 45
        }, $this->images());
149
    }
150
}
151