Passed
Push — master ( 2efc06...ec64c5 )
by Rougin
02:11
created

Scraper   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 146
Duplicated Lines 0 %

Test Coverage

Coverage 88.46%

Importance

Changes 0
Metric Value
eloc 39
dl 0
loc 146
ccs 46
cts 52
cp 0.8846
rs 10
c 0
b 0
f 0
wmc 10

7 Methods

Rating   Name   Duplication   Size   Complexity  
A replace() 0 21 2
A body() 0 7 1
A remove() 0 10 2
A tweet() 0 13 1
A title() 0 11 1
A html() 0 13 2
A prepare() 0 5 1
1
<?php
2
3
namespace Pilipinews\Common;
4
5
use Symfony\Component\DomCrawler\Crawler;
6
7
/**
8
 * Abstract Scraper
9
 *
10
 * @package Pilipinews
11
 * @author  Rougin Royce Gutib <[email protected]>
12
 */
13
abstract class Scraper
14
{
15
    /**
16
     * @var \Symfony\Component\DomCrawler\Crawler
17
     */
18
    protected $crawler;
19
20
    /**
21
     * Returns the article content based on a given element.
22
     *
23
     * @param  string $element
24
     * @return \Symfony\Component\DomCrawler\Crawler
25
     */
26 6
    protected function body($element)
27
    {
28 6
        $body = $this->crawler->filter($element)->first()->html();
29
30 6
        $body = trim(preg_replace('/\s+/', ' ', $body));
31
32 6
        return new Crawler(str_replace('  ', ' ', trim($body)));
33
    }
34
35
    /**
36
     * Returns the HTML format of the body from the crawler.
37
     *
38
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
39
     * @param  string[]                              $removables
40
     * @return string
41
     */
42 6
    protected function html(Crawler $crawler, $removables = array())
43
    {
44 6
        $converter = new Converter;
45
46 6
        $html = $converter->convert($crawler->html());
47
48 6
        foreach ((array) $removables as $keyword) {
49 6
            $html = str_replace($keyword, '', $html);
50
51 6
            $html = str_replace("\n\n\n", '', $html);
52 4
        }
53
54 6
        return trim(preg_replace('/\s\s+/', "\n\n", $html));
55 2
    }
56
57
    /**
58
     * Initializes the crawler instance.
59
     *
60
     * @param  string $link
61
     * @return void
62
     */
63
    protected function prepare($link)
64
    {
65
        $response = Client::request((string) $link);
66
67
        $this->crawler = new Crawler($response);
68
    }
69
70
    /**
71
     * Removes specified HTML tags from body.
72
     *
73
     * @param  string[] $elements
74
     * @return void
75
     */
76 4
    protected function remove($elements)
77
    {
78 2
        $callback = function ($crawler) {
79
            $node = $crawler->getNode((integer) 0);
80
81
            $node->parentNode->removeChild($node);
82 6
        };
83
84 6
        foreach ((array) $elements as $removable) {
85 6
            $this->crawler->filter($removable)->each($callback);
86 4
        }
87 6
    }
88
89
    /**
90
     * Replaces a specified HTML tag based from the given callback.
91
     *
92
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
93
     * @param  string                                $element
94
     * @param  callable                              $callback
95
     * @return \Symfony\Component\DomCrawler\Crawler
96
     */
97 4
    protected function replace(Crawler $crawler, $element, $callback)
98
    {
99 2
        $function = function (Crawler $crawler) use ($callback) {
100 6
            $node = $crawler->getNode(0);
101
102 6
            $html = $node->ownerDocument->saveHtml($node);
103
104 6
            $text = $callback($crawler, (string) $html);
105
106 6
            return array((string) $html, (string) $text);
107 6
        };
108
109 6
        $items = $crawler->filter($element)->each($function);
110
111 6
        $html = (string) $crawler->html();
112
113 6
        foreach ((array) $items as $item) {
114 6
            $html = str_replace($item[0], $item[1], $html);
115 4
        }
116
117 6
        return new Crawler((string) $html);
118
    }
119
120
    /**
121
     * Returns the title text based from given HTML tag.
122
     *
123
     * @param  string $element
124
     * @param  string $removable
125
     * @return string
126
     */
127 6
    protected function title($element, $removable = '')
128
    {
129 6
        $converter = new Converter;
130
131 6
        $crawler = $this->crawler->filter($element);
132
133 6
        $html = $crawler->first()->html();
134
135 6
        $html = str_replace($removable, '', $html);
136
137 6
        return $converter->convert((string) $html);
138
    }
139
140
    /**
141
     * Parses embedded Twitter tweet in the HTML.
142
     *
143
     * @param  \Symfony\Component\DomCrawler\Crawler $crawler
144
     * @return \Symfony\Component\DomCrawler\Crawler
145
     */
146
    protected function tweet(Crawler $crawler)
147
    {
148 6
        $callback = function (Crawler $crawler) {
149 6
            $parsed = (string) $crawler->text();
150
151 6
            $text = str_replace('📸: ', '', $parsed);
152
153 6
            return '<p>TWEET: ' . $text . '</p>';
154 6
        };
155
156 4
        $class = '.twitter-tweet';
157
158 4
        return $this->replace($crawler, $class, $callback);
159
    }
160
}
161