Completed
Push — master ( 926087...cea5b5 )
by Rémi
02:11
created

AdCrawler   A

Complexity

Total Complexity 14

Size/Duplication

Total Lines 164
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 8

Test Coverage

Coverage 98.39%

Importance

Changes 4
Bugs 0 Features 0
Metric Value
wmc 14
c 4
b 0
f 0
lcom 1
cbo 8
dl 0
loc 164
ccs 61
cts 62
cp 0.9839
rs 10

6 Methods

Rating   Name   Duplication   Size   Complexity  
A setUrlParser() 0 4 1
A getDescription() 0 11 2
A getAll() 0 12 1
B getPictures() 0 39 5
B getProperties() 0 28 2
A sanitize() 0 21 3
1
<?php
2
3
namespace Lbc\Crawler;
4
5
use Lbc\Filter\CitySanitizer;
6
use Lbc\Filter\CpSanitizer;
7
use Lbc\Filter\DefaultSanitizer;
8
use Lbc\Filter\KeySanitizer;
9
use Lbc\Parser\AdUrlParser;
10
use League\Uri\Schemes\Http;
11
use Symfony\Component\DomCrawler\Crawler;
12
13
/**
14
 * Class AdCrawler
15
 * @package Lbc\Crawler
16
 */
17
class AdCrawler extends CrawlerAbstract
18
{
19
    /**
20
     * @var AdUrlParser
21
     */
22
    protected $url;
23
24
    /**
25
     * @param $url
26
     * @return AdUrlParser
27
     */
28 14
    protected function setUrlParser($url)
29
    {
30 14
        $this->url = new AdUrlParser($url);
31 14
    }
32
33
    /**
34
     * Return a full ad information
35
     *
36
     * @return array
37
     */
38 6
    public function getAll()
39
    {
40 6
        return array_merge(
41
            [
42 6
                'id'       => $this->getUrlParser()->getId(),
43 6
                'category' => $this->getUrlParser()->getCategory(),
44
            ],
45 6
            $this->getPictures(),
46 6
            $this->getProperties(),
47 6
            $this->getDescription()
48
        );
49
    }
50
51
    /**
52
     * Return an array with the Thumbs pictures url
53
     *
54
     * @param Crawler $node
55
     * @return array
56
     */
57 8
    public function getPictures(Crawler $node = null)
58
    {
59 8
        $node = $node ?: $this->node;
60
61
        $images = [
62 8
            'images_thumbs' => [],
63
            'images'        => [],
64
        ];
65
66
        $node
67 8
            ->filter('.adview_main script')
68
            ->each(function (Crawler $crawler) use (&$images) {
69 8
                if (preg_match_all(
70 8
                    '#//img.+.leboncoin.fr/.*\.jpg#',
71 8
                    $crawler->html(),
72 8
                    $matches
73
                )) {
74 8
                    foreach ($matches[0] as $image) {
75 8
                        if (preg_match('/thumb/', $image)) {
76 8
                            array_push(
77 8
                                $images['images_thumbs'],
78 8
                                (string) Http::createFromString($image)
79 8
                                    ->withScheme($this->sheme)
80
                            );
81
82 8
                            continue;
83
                        }
84
85 8
                        array_push(
86 8
                            $images['images'],
87 8
                            (string) Http::createFromString($image)
88 8
                                ->withScheme($this->sheme)
89
                        );
90
                    }
91
                }
92 8
            });
93
94 8
        return $images;
95
    }
96
97
    /**
98
     * Return the common information (price, cp, city)
99
     *
100
     * @param Crawler $node
101
     *
102
     * @return array
103
     */
104 8
    public function getProperties(Crawler $node = null)
105
    {
106 8
        $node = $node ?: $this->node;
107
108
        $properties = [
109 8
            'titre'      => (new DefaultSanitizer)->clean(
110 8
                $node->filter('h1')->text()
111
            ),
112
            'created_at' => $node
113 8
                ->filter('*[itemprop=availabilityStarts]')
114 8
                ->first()
115 8
                ->attr('content'),
116 8
            'is_pro'     => ($node->filter('.ispro')->count()),
117
        ];
118
119 8
        $node->filter('h2')
120 8
            ->each(function (Crawler $crawler) use (&$properties) {
121 8
                $properties = array_merge(
122
                    $properties,
123 8
                    $this->sanitize(
124 8
                        $crawler->filter('.property')->text(),
125 8
                        $crawler->filter('.value')->text()
126
                    )
127
                );
128 8
            });
129
130 8
        return ['properties' => $properties];
131
    }
132
133
    /**
134
     * Return the description
135
     *
136
     * @param Crawler $node
137
     * @return string
138
     */
139 8
    public function getDescription(Crawler $node = null)
140
    {
141 8
        $node = $node ?: $this->node;
142
143
        return [
144 8
            'description' => $this->getFieldValue(
145 8
                $node->filter("p[itemprop=description]"),
146 8
                null
147
            ),
148
        ];
149
    }
150
151
    /**
152
     * Transform the properties name into a snake_case string and sanitize
153
     * the value
154
     *
155
     * @param string $key
156
     * @param string $value
157
     * @return string
158
     */
159 8
    private function sanitize($key, $value)
160
    {
161 8
        $key = (new KeySanitizer)->clean($key);
162
163 8
        if ($key == 'ville') {
164
            return [
165 8
                'ville' => (new CitySanitizer)->clean($value),
166 8
                'cp'    => (new CpSanitizer)->clean($value),
167
            ];
168
        }
169
170 8
        $filteClass = 'Lbc\\Filter\\' . ucfirst($key) . 'Sanitizer';
171
172 8
        if (!class_exists($filteClass)) {
173 8
            $filteClass = 'Lbc\\Filter\\DefaultSanitizer';
174
        }
175
176
        return [
177 8
            $key => call_user_func_array([(new $filteClass), 'clean'], [$value]),
178
        ];
179
    }
180
}
181