Article::setRawDoc()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 4
c 0
b 0
f 0
rs 10
cc 1
eloc 2
nc 1
nop 1
1
<?php declare(strict_types=1);
2
3
namespace Goose;
4
5
use Goose\Images\Image;
6
use DOMWrap\{Element, Document};
7
8
/**
9
 * Article
10
 *
11
 * @package Goose
12
 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License 2.0
13
 */
14
class Article {
15
    /**
16
     * Language of the article
17
     *
18
     * @var string|null
19
     */
20
    protected $language;
21
22
    /**
23
     * @param string $language
24
     *
25
     * @return self
26
     */
27
    public function setLanguage(string $language = null): self {
28
        $this->language = $language;
29
30
        return $this;
31
    }
32
33
    /** @return string|null */
34
    public function getLanguage(): ?string {
35
        return $this->language;
36
    }
37
38
    /**
39
     * OpenGraph meta data
40
     *
41
     * @var string[]
42
     */
43
    protected $openGraph = [];
44
45
    /**
46
     * @param string[] $openGraph
47
     *
48
     * @return self
49
     */
50
    public function setOpenGraph(array $openGraph): self {
51
        $this->openGraph = $openGraph;
52
53
        return $this;
54
    }
55
56
    /** @return string[] */
57
    public function getOpenGraph(): array {
58
        return $this->openGraph;
59
    }
60
61
    /**
62
     * Title of the article
63
     *
64
     * @var string|null
65
     */
66
    protected $title;
67
68
    /**
69
     * @param string $title
70
     *
71
     * @return self
72
     */
73
    public function setTitle(string $title = null): self {
74
        $this->title = $title;
75
76
        return $this;
77
    }
78
79
    /** @return string|null */
80
    public function getTitle(): ?string {
81
        return $this->title;
82
    }
83
84
    /**
85
     * Stores the lovely, pure text from the article, stripped of html, formatting, etc...
86
     * just raw text with paragraphs separated by newlines. This is probably what you want to use.
87
     *
88
     * @var string|null
89
     */
90
    protected $cleanedArticleText;
91
92
    /**
93
     * @param string $cleanedArticleText
94
     *
95
     * @return self
96
     */
97
    public function setCleanedArticleText(string $cleanedArticleText = null): self {
98
        $this->cleanedArticleText = $cleanedArticleText;
99
100
        return $this;
101
    }
102
103
    /** @return string|null */
104
    public function getCleanedArticleText(): ?string {
105
        return $this->cleanedArticleText;
106
    }
107
108
    /**
109
     * Article with the originals HTML tags (<p>, <a>, ..)
110
     *
111
     * @var string|null
112
     */
113
    protected $htmlArticle;
114
115
    /**
116
     * @param string $htmlArticle
117
     *
118
     * @return self
119
     */
120
    public function setHtmlArticle(string $htmlArticle = null): self {
121
        $this->htmlArticle = $htmlArticle;
122
123
        return $this;
124
    }
125
126
    /** @return string|null */
127
    public function getHtmlArticle(): ?string {
128
        return $this->htmlArticle;
129
    }
130
131
    /**
132
     * Meta description field in HTML source
133
     *
134
     * @var string|null
135
     */
136
    protected $metaDescription;
137
138
    /**
139
     * @param string $metaDescription
140
     *
141
     * @return self
142
     */
143
    public function setMetaDescription(string $metaDescription = null): self {
144
        $this->metaDescription = $metaDescription;
145
146
        return $this;
147
    }
148
149
    /** @return string|null */
150
    public function getMetaDescription(): ?string {
151
        return $this->metaDescription;
152
    }
153
154
    /**
155
     * Meta keywords field in the HTML source
156
     *
157
     * @var string|null
158
     */
159
    protected $metaKeywords;
160
161
    /**
162
     * @param string $metaKeywords
163
     *
164
     * @return self
165
     */
166
    public function setMetaKeywords(string $metaKeywords = null): self {
167
        $this->metaKeywords = $metaKeywords;
168
169
        return $this;
170
    }
171
172
    /** @return string */
173
    public function getMetaKeywords(): ?string {
174
        return $this->metaKeywords;
175
    }
176
177
    /**
178
     * The canonical link of this article if found in the meta data
179
     *
180
     * @var string|null
181
     */
182
    protected $canonicalLink;
183
184
    /**
185
     * @param string $canonicalLink
186
     *
187
     * @return self
188
     */
189
    public function setCanonicalLink(string $canonicalLink = null): self {
190
        $this->canonicalLink = $canonicalLink;
191
192
        return $this;
193
    }
194
195
    /** @return string|null */
196
    public function getCanonicalLink(): ?string {
197
        return $this->canonicalLink;
198
    }
199
200
    /**
201
     * Domain of the article we're parsing
202
     *
203
     * @var string|null
204
     */
205
    protected $domain;
206
207
    /**
208
     * @param string $domain
209
     *
210
     * @return self
211
     */
212
    public function setDomain(string $domain = null): self {
213
        $this->domain = $domain;
214
215
        return $this;
216
    }
217
218
    /** @return string|null */
219
    public function getDomain(): ?string {
220
        return $this->domain;
221
    }
222
223
    /**
224
     * Top Element we think is a candidate for the main body of the article
225
     *
226
     * @var Element|null
227
     */
228
    protected $topNode;
229
230
    /**
231
     * @param Element|null $topNode
232
     *
233
     * @return self
234
     */
235
    public function setTopNode(Element $topNode = null): self {
236
        $this->topNode = $topNode;
237
238
        return $this;
239
    }
240
241
    /** @return Element|null */
242
    public function getTopNode(): ?Element {
243
        return $this->topNode;
244
    }
245
246
    /**
247
     * Top Image object that we think represents this article
248
     *
249
     * @var Image|null
250
     */
251
    protected $topImage;
252
253
    /**
254
     * @param Image|null $topImage
255
     *
256
     * @return self
257
     */
258
    public function setTopImage(Image $topImage = null): self {
259
        $this->topImage = $topImage;
260
261
        return $this;
262
    }
263
264
    /** @return Image|null */
265
    public function getTopImage(): ?Image {
266
        return $this->topImage;
267
    }
268
269
    /**
270
     * All image candidates from article
271
     *
272
     * @var Image[]
273
     */
274
    protected $allImages = [];
275
276
    /**
277
     * @param Image[] $allImages
278
     *
279
     * @return self
280
     */
281
    public function setAllImages(array $allImages = []): self {
282
        $this->allImages = $allImages;
283
284
        return $this;
285
    }
286
287
    /** @return Image[] */
288
    public function getAllImages(): array {
289
        return $this->allImages;
290
    }
291
292
    /**
293
     * Tags that may have been in the article, these are not meta keywords
294
     *
295
     * @var string[]
296
     */
297
    protected $tags = [];
298
299
    /**
300
     * @param string[] $tags
301
     *
302
     * @return self
303
     */
304
    public function setTags(array $tags): self {
305
        $this->tags = $tags;
306
307
        return $this;
308
    }
309
310
    /** @return string[] */
311
    public function getTags(): array {
312
        return $this->tags;
313
    }
314
315
    /**
316
     *  List of links in the article
317
     *
318
     * @var string[]
319
     */
320
    protected $links = [];
321
322
    /**
323
     * @param string[] $links
324
     *
325
     * @return self
326
     */
327
    public function setLinks(array $links): self {
328
        $this->links = $links;
329
330
        return $this;
331
    }
332
333
    /** @return string[] */
334
    public function getLinks(): array {
335
        return $this->links;
336
    }
337
338
    /**
339
     * List of any videos we found on the page like youtube, vimeo
340
     *
341
     * @var string[]
342
     */
343
    protected $videos = [];
344
345
    /**
346
     * @param string[] $videos
347
     *
348
     * @return self
349
     */
350
    public function setVideos(array $videos): self {
351
        $this->videos = $videos;
352
353
        return $this;
354
    }
355
356
    /** @return string[] */
357
    public function getVideos(): array {
358
        return $this->videos;
359
    }
360
361
    /**
362
     * Final URL that we're going to try and fetch content against, this would be expanded if any
363
     * escaped fragments were found in the starting url
364
     *
365
     * @var string|null
366
     */
367
    protected $finalUrl;
368
369
    /**
370
     * @param string $finalUrl
371
     *
372
     * @return self
373
     */
374
    public function setFinalUrl(string $finalUrl = null): self {
375
        $this->finalUrl = $finalUrl;
376
377
        return $this;
378
    }
379
380
    /** @return string|null */
381
    public function getFinalUrl(): ?string {
382
        return $this->finalUrl;
383
    }
384
385
    /**
386
     * MD5 hash of the url to use for various identification tasks
387
     *
388
     * @var string|null
389
     */
390
    protected $linkhash;
391
392
    /**
393
     * @param string $linkhash
394
     *
395
     * @return self
396
     */
397
    public function setLinkhash(string $linkhash = null): self {
398
        $this->linkhash = $linkhash;
399
400
        return $this;
401
    }
402
403
    /** @return string */
404
    public function getLinkhash(): ?string {
405
        return $this->linkhash;
406
    }
407
408
    /**
409
     * Raw HTML straight from the network connection
410
     *
411
     * @var string|null
412
     */
413
    protected $rawHtml;
414
415
    /**
416
     * @param string $rawHtml
417
     *
418
     * @return self
419
     */
420
    public function setRawHtml(string $rawHtml = null): self {
421
        $this->rawHtml = $rawHtml;
422
423
        return $this;
424
    }
425
426
    /** @return string|null */
427
    public function getRawHtml(): ?string {
428
        return $this->rawHtml;
429
    }
430
431
    /**
432
     * DOM Document object
433
     *
434
     * @var Document
435
     */
436
    protected $doc;
437
438
    /**
439
     * @param Document $doc
440
     *
441
     * @return self
442
     */
443
    public function setDoc(Document $doc): self {
444
        $this->doc = $doc;
445
446
        return $this;
447
    }
448
449
    /** @return Document */
450
    public function getDoc(): Document {
451
        return $this->doc;
452
    }
453
454
    /**
455
     * Original DOM document that contains a pure object from the original HTML without any cleaning
456
     * options done on it
457
     *
458
     * @var Document
459
     */
460
    protected $rawDoc;
461
462
    /**
463
     * @param Document $rawDoc
464
     *
465
     * @return self
466
     */
467
    public function setRawDoc(Document $rawDoc): self {
468
        $this->rawDoc = $rawDoc;
469
470
        return $this;
471
    }
472
473
    /** @return Document */
474
    public function getRawDoc(): Document {
475
        return $this->rawDoc;
476
    }
477
478
    /**
479
     * Original psr7 response object
480
     *
481
     * @var \Psr\Http\Message\ResponseInterface
482
     */
483
    protected $rawResponse;
484
485
    /**
486
     * @param \Psr\Http\Message\ResponseInterface|null $rawResponse
487
     *
488
     * @return self
489
     */
490
    public function setRawResponse(\Psr\Http\Message\ResponseInterface $rawResponse): self {
491
        $this->rawResponse = $rawResponse;
492
493
        return $this;
494
    }
495
496
    /** @return \Psr\Http\Message\ResponseInterface */
497
    public function getRawResponse(): \Psr\Http\Message\ResponseInterface {
498
        return $this->rawResponse;
499
    }
500
501
    /**
502
     * Sometimes useful to try and know when the publish date of an article was
503
     *
504
     * @var \DateTime|null
505
     */
506
    protected $publishDate;
507
508
    /**
509
     * @param \DateTime|null $publishDate
510
     *
511
     * @return self
512
     */
513
    public function setPublishDate(\DateTime $publishDate = null): self {
514
        $this->publishDate = $publishDate;
515
516
        return $this;
517
    }
518
519
    /** @return \DateTime|null */
520
    public function getPublishDate(): ?\DateTime {
521
        return $this->publishDate;
522
    }
523
524
    /**
525
     * A property bucket for consumers of goose to store custom data extractions.
526
     *
527
     * @var string|null
528
     */
529
    protected $additionalData;
530
531
    /**
532
     * @param string|null $additionalData
533
     *
534
     * @return self
535
     */
536
    public function setAdditionalData(string $additionalData = null): self {
537
        $this->additionalData = $additionalData;
538
539
        return $this;
540
    }
541
542
    /** @return string|null */
543
    public function getAdditionalData(): ?string {
544
        return $this->additionalData;
545
    }
546
547
    /**
548
     * Facebook Open Graph data that that is found in Article Meta tags
549
     *
550
     * @var string|null
551
     */
552
    protected $openGraphData;
553
554
    /**
555
     * @param string $openGraphData
556
     *
557
     * @return self
558
     */
559
    public function setOpenGraphData(string $openGraphData = null): self {
560
        $this->openGraphData = $openGraphData;
561
562
        return $this;
563
    }
564
565
    /** @return string|null */
566
    public function getOpenGraphData(): ?string {
567
        return $this->openGraphData;
568
    }
569
570
    /**
571
     * Most popular words used in the lovely article
572
     *
573
     * @var string[]
574
     */
575
    protected $popularWords = [];
576
577
    /**
578
     * @param string[] $popularWords
579
     *
580
     * @return self
581
     */
582
    public function setPopularWords(array $popularWords): self {
583
        $this->popularWords = $popularWords;
584
585
        return $this;
586
    }
587
588
    /** @return string[] */
589
    public function getPopularWords(): array {
590
        return $this->popularWords;
591
    }
592
}