Completed
Push — master ( 94a5f2...0612f1 )
by Andrew
05:14
created

Article::getTopImage()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
3
namespace Goose;
4
5
use Goose\Images\Image;
6
use DOMWrap\Element;
7
use DOMWrap\Document;
8
9
/**
10
 * Article
11
 *
12
 * @package Goose
13
 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License 2.0
14
 */
15
class Article {
16
    /**
17
     * Language of the article
18
     *
19
     * @var string
20
     */
21
    protected $language;
22
23
    /** @param string $language */
24
    public function setLanguage($language) {
25
        $this->language = $language;
26
    }
27
28
    /** @return string */
29
    public function getLanguage() {
30
        return $this->language;
31
    }
32
33
    /**
34
     * OpenGraph meta data
35
     *
36
     * @var string[]
37
     */
38
    protected $openGraph;
39
40
    /** @param string[] $openGraph */
41
    public function setOpenGraph($openGraph) {
42
        $this->openGraph = $openGraph;
43
    }
44
45
    /** @return string[] */
46
    public function getOpenGraph() {
47
        return $this->openGraph;
48
    }
49
50
    /**
51
     * Title of the article
52
     *
53
     * @var string
54
     */
55
    protected $title;
56
57
    /** @param string $title */
58
    public function setTitle($title) {
59
        $this->title = $title;
60
    }
61
62
    /** @return string */
63
    public function getTitle() {
64
        return $this->title;
65
    }
66
67
    /**
68
     * Stores the lovely, pure text from the article, stripped of html, formatting, etc...
69
     * just raw text with paragraphs separated by newlines. This is probably what you want to use.
70
     *
71
     * @var string
72
     */
73
    protected $cleanedArticleText;
74
75
    /** @param string $cleanedArticleText */
76
    public function setCleanedArticleText($cleanedArticleText) {
77
        $this->cleanedArticleText = $cleanedArticleText;
78
    }
79
80
    /** @return string */
81
    public function getCleanedArticleText() {
82
        return $this->cleanedArticleText;
83
    }
84
85
    /**
86
     * Article with the originals HTML tags (<p>, <a>, ..)
87
     *
88
     * @var string
89
     */
90
    protected $htmlArticle;
91
92
    /** @param string $htmlArticle */
93
    public function setHtmlArticle($htmlArticle) {
94
        $this->htmlArticle = $htmlArticle;
95
    }
96
97
    /** @return string */
98
    public function getHtmlArticle() {
99
        return $this->htmlArticle;
100
    }
101
102
    /**
103
     * Meta description field in HTML source
104
     *
105
     * @var string
106
     */
107
    protected $metaDescription;
108
109
    /** @param string $metaDescription */
110
    public function setMetaDescription($metaDescription) {
111
        $this->metaDescription = $metaDescription;
112
    }
113
114
    /** @return string */
115
    public function getMetaDescription() {
116
        return $this->metaDescription;
117
    }
118
119
    /**
120
     * Meta keywords field in the HTML source
121
     *
122
     * @var string
123
     */
124
    protected $metaKeywords;
125
126
    /** @param string $metaKeywords */
127
    public function setMetaKeywords($metaKeywords) {
128
        $this->metaKeywords = $metaKeywords;
129
    }
130
131
    /** @return string */
132
    public function getMetaKeywords() {
133
        return $this->metaKeywords;
134
    }
135
136
    /**
137
     * The canonical link of this article if found in the meta data
138
     *
139
     * @var string
140
     */
141
    protected $canonicalLink;
142
143
    /** @param string $canonicalLink */
144
    public function setCanonicalLink($canonicalLink) {
145
        $this->canonicalLink = $canonicalLink;
146
    }
147
148
    /** @return string */
149
    public function getCanonicalLink() {
150
        return $this->canonicalLink;
151
    }
152
153
    /**
154
     * Domain of the article we're parsing
155
     *
156
     * @var string
157
     */
158
    protected $domain;
159
160
    /** @param string $domain */
161
    public function setDomain($domain) {
162
        $this->domain = $domain;
163
    }
164
165
    /** @return string */
166
    public function getDomain() {
167
        return $this->domain;
168
    }
169
170
    /**
171
     * Top Element we think is a candidate for the main body of the article
172
     *
173
     * @var Element|null
174
     */
175
    protected $topNode;
176
177
    /** @param Element|null $topNode */
178
    public function setTopNode(Element $topNode = null) {
179
        $this->topNode = $topNode;
180
    }
181
182
    /** @return Element|null */
183
    public function getTopNode() {
184
        return $this->topNode;
185
    }
186
187
    /**
188
     * Top Image object that we think represents this article
189
     *
190
     * @var Image|null
191
     */
192
    protected $topImage;
193
194
    /** @param Image|null $topImage */
195
    public function setTopImage(Image $topImage = null) {
196
        $this->topImage = $topImage;
197
    }
198
199
    /** @return Image|null */
200
    public function getTopImage() {
201
        return $this->topImage;
202
    }
203
204
    /**
205
     * All image candidates from article
206
     *
207
     * @var Image[]
208
     */
209
    protected $allImages = [];
210
211
    /** @param Image[] $allImages */
212
    public function setAllImages($allImages = []) {
213
        $this->allImages = $allImages;
214
    }
215
216
    /** @return Image[] */
217
    public function getAllImages() {
218
        return $this->allImages;
219
    }
220
221
    /**
222
     * Tags that may have been in the article, these are not meta keywords
223
     *
224
     * @var string[]
225
     */
226
    protected $tags = [];
227
228
    /** @param string[] $tags */
229
    public function setTags($tags) {
230
        $this->tags = $tags;
231
    }
232
233
    /** @return string[] */
234
    public function getTags() {
235
        return $this->tags;
236
    }
237
238
    /**
239
     *  List of links in the article
240
     *
241
     * @var string[]
242
     */
243
    protected $links = [];
244
245
    /** @param string[] $links */
246
    public function setLinks($links) {
247
        $this->links = $links;
248
    }
249
250
    /** @return string[] */
251
    public function getLinks() {
252
        return $this->links;
253
    }
254
255
    /**
256
     * List of any videos we found on the page like youtube, vimeo
257
     *
258
     * @var string[]
259
     */
260
    protected $videos = [];
261
262
    /** @param string[] $videos */
263
    public function setVideos($videos) {
264
        $this->videos = $videos;
265
    }
266
267
    /** @return string[] */
268
    public function getVideos() {
269
        return $this->videos;
270
    }
271
272
    /**
273
     * Final URL that we're going to try and fetch content against, this would be expanded if any
274
     * escaped fragments were found in the starting url
275
     *
276
     * @var string
277
     */
278
    protected $finalUrl;
279
280
    /** @param string $finalUrl */
281
    public function setFinalUrl($finalUrl) {
282
        $this->finalUrl = $finalUrl;
283
    }
284
285
    /** @return string */
286
    public function getFinalUrl() {
287
        return $this->finalUrl;
288
    }
289
290
    /**
291
     * MD5 hash of the url to use for various identification tasks
292
     *
293
     * @var string
294
     */
295
    protected $linkhash;
296
297
    /** @param string $linkhash */
298
    public function setLinkhash($linkhash) {
299
        $this->linkhash = $linkhash;
300
    }
301
302
    /** @return string */
303
    public function getLinkhash() {
304
        return $this->linkhash;
305
    }
306
307
    /**
308
     * Raw HTML straight from the network connection
309
     *
310
     * @var string
311
     */
312
    protected $rawHtml;
313
314
    /** @param string $rawHtml */
315
    public function setRawHtml($rawHtml) {
316
        $this->rawHtml = $rawHtml;
317
    }
318
319
    /** @return string */
320
    public function getRawHtml() {
321
        return $this->rawHtml;
322
    }
323
324
    /**
325
     * DOM Document object
326
     *
327
     * @var Document
328
     */
329
    protected $doc;
330
331
    /** @param Document $doc */
332
    public function setDoc(Document $doc) {
333
        $this->doc = $doc;
334
    }
335
336
    /** @return Document */
337
    public function getDoc() {
338
        return $this->doc;
339
    }
340
341
    /**
342
     * Original DOM document that contains a pure object from the original HTML without any cleaning
343
     * options done on it
344
     *
345
     * @var Document
346
     */
347
    protected $rawDoc;
348
349
    /** @param Document $rawDoc */
350
    public function setRawDoc(Document $rawDoc) {
351
        $this->rawDoc = $rawDoc;
352
    }
353
354
    /** @return Document */
355
    public function getRawDoc() {
356
        return $this->rawDoc;
357
    }
358
359
    /**
360
     * Original psr7 response object
361
     *
362
     * @var \Psr\Http\Message\ResponseInterface
363
     */
364
    protected $rawResponse;
365
366
    /** @param \Psr\Http\Message\ResponseInterface|null $rawResponse */
367
    public function setRawResponse(\Psr\Http\Message\ResponseInterface $rawResponse) {
368
        $this->rawResponse = $rawResponse;
369
    }
370
371
    /** @return \Psr\Http\Message\ResponseInterface */
372
    public function getRawResponse() {
373
        return $this->rawResponse;
374
    }
375
376
    /**
377
     * Sometimes useful to try and know when the publish date of an article was
378
     *
379
     * @var \DateTime|null
380
     */
381
    protected $publishDate;
382
383
    /** @param \DateTime|null $publishDate */
384
    public function setPublishDate(\DateTime $publishDate = null) {
385
        $this->publishDate = $publishDate;
386
    }
387
388
    /** @return \DateTime|null */
389
    public function getPublishDate() {
390
        return $this->publishDate;
391
    }
392
393
    /**
394
     * A property bucket for consumers of goose to store custom data extractions.
395
     *
396
     * @return string
397
     */
398
    protected $additionalData;
399
400
    /** @param string $additionalData */
401
    public function setAdditionalData($additionalData) {
402
        $this->additionalData = $additionalData;
403
    }
404
405
    /** @return string */
406
    public function getAdditionalData() {
407
        return $this->additionalData;
408
    }
409
410
    /**
411
     * Facebook Open Graph data that that is found in Article Meta tags
412
     *
413
     * @return string
414
     */
415
    protected $openGraphData;
416
417
    /** @param string $openGraphData */
418
    public function setOpenGraphData($openGraphData) {
419
        $this->openGraphData = $openGraphData;
420
    }
421
422
    /** @return string */
423
    public function getOpenGraphData() {
424
        return $this->openGraphData;
425
    }
426
427
    /**
428
     * Most popular words used in the lovely article
429
     *
430
     * @return string[]
431
     */
432
    protected $popularWords = [];
433
434
    /** @param string[] $popularWords */
435
    public function setPopularWords($popularWords) {
436
        $this->popularWords = $popularWords;
437
    }
438
439
    /** @return string[] */
440
    public function getPopularWords() {
441
        return $this->popularWords;
442
    }
443
}