Scraper::scrape()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 17
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 1
eloc 8
c 2
b 0
f 0
nc 1
nop 1
dl 0
loc 17
ccs 9
cts 9
cp 1
crap 1
rs 10
1
<?php
2
3
namespace Pilipinews\Website\Gma;
4
5
use Pilipinews\Common\Article;
6
use Pilipinews\Common\Client;
7
use Pilipinews\Common\Converter;
8
use Pilipinews\Common\Crawler as DomCrawler;
9
use Pilipinews\Common\Interfaces\ScraperInterface;
10
use Pilipinews\Common\Scraper as AbstractScraper;
11
12
/**
13
 * GMA News Scraper
14
 *
15
 * @package Pilipinews
16
 * @author  Rougin Gutib <[email protected]>
17
 */
18
class Scraper extends AbstractScraper implements ScraperInterface
19
{
20
    /**
21
     * Returns the contents of an article.
22
     *
23
     * @param  string $link
24
     * @return \Pilipinews\Common\Article
25
     */
26 9
    public function scrape($link)
27
    {
28 9
        $this->prepare(mb_strtolower($link));
29
30 9
        $title = $this->json['headline'];
31
32 9
        $title = str_replace(' | News |', '', $title);
33
34 9
        $converter = new Converter;
35
36 9
        $title = $converter->convert($title);
37
38 9
        $body = $this->tweet($this->crawler);
39
40 9
        $html = (string) $this->html($body);
41
42 9
        return new Article($title, $html, $link);
43 1
    }
44
45
    /**
46
     * Initializes the crawler instance.
47
     *
48
     * @param  string $link
49
     * @return void
50
     */
51 9
    protected function prepare($link)
52 1
    {
53 9
        $response = (string) Client::request((string) $link);
54
55 9
        $html = trim(preg_replace('/\s+/', ' ', $response));
56
57 9
        $html = str_replace('<p> <strong>', '<p><strong>', $html);
58
59 9
        $html = str_replace('<br /> ', '<br />', $html);
60
61 9
        preg_match('/<script type="application\/ld\+json"\>(.*?)<\/script\>/i', $html, $match);
62
63 9
        $this->json = json_decode($match[1], true);
0 ignored issues
show
Bug Best Practice introduced by
The property json does not exist. Although not strictly required by PHP, it is generally a best practice to declare properties explicitly.
Loading history...
64
65 9
        $content = (string) $this->json['articleBody'];
66
67 9
        $this->crawler = new DomCrawler((string) $content);
68 9
    }
69
}
70