1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Jclyons52\PagePreview; |
4
|
|
|
|
5
|
|
|
use Jclyons52\PHPQuery\Document; |
6
|
|
|
|
7
|
|
|
class PreviewBuilder |
8
|
|
|
{ |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* PHPQuery document object that will be used to select elements |
12
|
|
|
* @var \Jclyons52\PHPQuery\Document |
13
|
|
|
*/ |
14
|
|
|
public $document; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* reference to the url parameter the user passes into the fetch method |
18
|
|
|
* @var string |
19
|
|
|
*/ |
20
|
|
|
public $url; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* destructured array of url components from parse_url |
24
|
|
|
* @var array |
25
|
|
|
*/ |
26
|
|
|
protected $urlComponents; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var HttpInterface |
30
|
|
|
*/ |
31
|
|
|
protected $http; |
32
|
|
|
|
33
|
81 |
|
public function __construct(HttpInterface $http) |
34
|
|
|
{ |
35
|
81 |
|
$this->http = $http; |
36
|
81 |
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Instantiate class with dependencies |
40
|
|
|
* @return static |
41
|
|
|
*/ |
42
|
25 |
|
public static function create() |
43
|
|
|
{ |
44
|
3 |
|
$http = new Http(); |
45
|
3 |
|
return new PreviewBuilder($http); |
46
|
24 |
|
} |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @param string $url |
50
|
|
|
* @return Preview |
51
|
|
|
* @throws \Exception |
52
|
|
|
*/ |
53
|
45 |
|
public function fetch($url) |
54
|
|
|
{ |
55
|
45 |
|
$this->url = $url; |
56
|
|
|
|
57
|
45 |
|
$urlComponents = parse_url($url); |
58
|
|
|
|
59
|
45 |
|
if (filter_var($url, FILTER_VALIDATE_URL) === false) { |
60
|
3 |
|
throw new \Exception("url {$this->url} is invalid"); |
61
|
|
|
} |
62
|
42 |
|
$this->urlComponents = $urlComponents; |
|
|
|
|
63
|
|
|
|
64
|
42 |
|
$body = $this->http->get($url); |
65
|
|
|
|
66
|
42 |
|
if ($body === false) { |
67
|
3 |
|
throw new \Exception('failed to load page'); |
68
|
|
|
} |
69
|
|
|
|
70
|
39 |
|
$this->document = new Document($body); |
71
|
|
|
|
72
|
39 |
|
return $this->getPreview(); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* @return string |
77
|
|
|
*/ |
78
|
39 |
|
public function title() |
79
|
|
|
{ |
80
|
39 |
|
return $this->document->querySelector('title')->text(); |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* @return mixed |
85
|
|
|
*/ |
86
|
39 |
|
public function metaKeywords() |
87
|
|
|
{ |
88
|
39 |
|
$keywordsElement = $this->document->querySelector('meta[name="keywords"]'); |
89
|
|
|
|
90
|
39 |
|
if (!$keywordsElement) { |
91
|
6 |
|
return []; |
92
|
|
|
} |
93
|
|
|
|
94
|
33 |
|
$keywordString = $keywordsElement->attr('content'); |
95
|
|
|
|
96
|
33 |
|
$keywords = explode(',', $keywordString); |
97
|
|
|
|
98
|
33 |
|
return array_map(function ($word) { |
99
|
33 |
|
return trim($word); |
100
|
|
|
|
101
|
33 |
|
}, $keywords); |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* @param string $element |
106
|
|
|
* @return array |
107
|
|
|
*/ |
108
|
39 |
|
public function meta($element = null) |
109
|
|
|
{ |
110
|
39 |
|
$selector = "meta"; |
111
|
39 |
|
if ($element !== null) { |
112
|
39 |
|
$selector .= "[name='{$element}']"; |
113
|
39 |
|
$metaTags = $this->document->querySelector($selector); |
114
|
39 |
|
if ($metaTags === null) { |
115
|
6 |
|
return null; |
116
|
|
|
} |
117
|
33 |
|
return $metaTags->attr('content'); |
118
|
|
|
} |
119
|
39 |
|
$metaTags = $this->document->querySelectorAll($selector); |
120
|
|
|
|
121
|
39 |
|
return $this->metaTagsToArray($metaTags); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* get source attributes of all image tags on the page |
126
|
|
|
* @return array<String> |
127
|
|
|
*/ |
128
|
39 |
|
public function images() |
129
|
|
|
{ |
130
|
39 |
|
$images = $this->document->querySelectorAll('img'); |
131
|
|
|
|
132
|
39 |
|
if ($images === []) { |
133
|
3 |
|
return []; |
134
|
|
|
} |
135
|
36 |
|
$urls = $images->attr('src'); |
136
|
36 |
|
$result = []; |
137
|
36 |
|
foreach ($urls as $url) { |
138
|
36 |
|
$result[] = $this->formatUrl($url); |
139
|
24 |
|
} |
140
|
36 |
|
return $result; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* returns an instance of Preview |
145
|
|
|
* @return Preview |
146
|
|
|
*/ |
147
|
39 |
|
public function getPreview() |
148
|
|
|
{ |
149
|
39 |
|
$title = $this->title(); |
150
|
|
|
|
151
|
39 |
|
$images = $this->images(); |
152
|
|
|
|
153
|
39 |
|
$description = $this->meta('description'); |
154
|
|
|
|
155
|
39 |
|
$meta = $this->meta(); |
156
|
|
|
|
157
|
39 |
|
$keywords = $this->metaKeywords(); |
158
|
|
|
|
159
|
39 |
|
if ($keywords !== []) { |
160
|
33 |
|
$meta['keywords'] = $keywords; |
161
|
22 |
|
} |
162
|
|
|
|
163
|
39 |
|
$media = new Media($this->http); |
164
|
|
|
|
165
|
39 |
|
return new Preview($media, [ |
166
|
39 |
|
'title' => $title, |
167
|
39 |
|
'images' => $images, |
168
|
39 |
|
'description' => $description, |
169
|
39 |
|
'url' => $this->url, |
170
|
39 |
|
'meta' => $meta, |
171
|
26 |
|
]); |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* @param string $url |
176
|
|
|
* @return string |
177
|
|
|
*/ |
178
|
36 |
|
private function formatUrl($url) |
179
|
|
|
{ |
180
|
36 |
|
if (substr($url, 0, 5) === "data:") { |
181
|
30 |
|
return $url; |
182
|
|
|
} |
183
|
|
|
|
184
|
36 |
|
$path = array_key_exists('path', $this->urlComponents) ? $this->urlComponents['path'] : ''; |
185
|
|
|
|
186
|
36 |
|
if (filter_var($url, FILTER_VALIDATE_URL) !== false) { |
187
|
36 |
|
return $url; |
188
|
|
|
} |
189
|
36 |
|
if (substr($url, 0, 1) === '/') { |
190
|
36 |
|
return 'http://' . $this->urlComponents['host'] . $url; |
191
|
|
|
} |
192
|
|
|
|
193
|
36 |
|
$host = trim($this->urlComponents['host'], '/') . '/'; |
194
|
36 |
|
$path = trim($path, '/') . '/'; |
195
|
|
|
|
196
|
36 |
|
return 'http://' . $host . $path . $url; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
/** |
200
|
|
|
* @param \Jclyons52\PHPQuery\Support\NodeCollection $metaTags |
201
|
|
|
* @return array |
202
|
|
|
*/ |
203
|
39 |
|
private function metaTagsToArray($metaTags) |
204
|
|
|
{ |
205
|
39 |
|
$values = []; |
206
|
39 |
|
foreach ($metaTags as $meta) { |
207
|
39 |
|
$name = $meta->attr('name'); |
208
|
39 |
|
if ($name === '') { |
209
|
39 |
|
$name = $meta->attr('property'); |
210
|
26 |
|
} |
211
|
39 |
|
$content = $meta->attr('content'); |
212
|
39 |
|
if ($name === '' || $content == '') { |
213
|
39 |
|
continue; |
214
|
|
|
} |
215
|
33 |
|
$values[$name] = $content; |
216
|
26 |
|
} |
217
|
|
|
|
218
|
39 |
|
return $values; |
219
|
|
|
} |
220
|
|
|
} |
221
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.