Completed
Push — master ( 56f5c3...fdee41 )
by Rémi
15s
created

AdCrawler::getProperties()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 28
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 28
ccs 21
cts 21
cp 1
rs 8.8571
cc 2
eloc 18
nc 2
nop 1
crap 2
1
<?php
2
3
namespace Lbc\Crawler;
4
5
use Lbc\Filter\CitySanitizer;
6
use Lbc\Filter\CpSanitizer;
7
use Lbc\Filter\DefaultSanitizer;
8
use Lbc\Filter\KeySanitizer;
9
use Lbc\Parser\AdUrlParser;
10
use League\Uri\Schemes\Http;
11
use Symfony\Component\DomCrawler\Crawler;
12
13
/**
14
 * Class AdCrawler
15
 * @package Lbc\Crawler
16
 */
17
class AdCrawler extends CrawlerAbstract
18
{
19
    /**
20
     * @var AdUrlParser
21
     */
22
    protected $url;
23
24
    /**
25
     * @param $url
26
     * @return AdUrlParser
27
     */
28 14
    protected function setUrlParser($url)
29
    {
30 14
        $this->url = new AdUrlParser($url);
31 14
    }
32
33
    /**
34
     * Return a full ad information
35
     *
36
     * @return array
37
     */
38 6
    public function getAll()
39
    {
40 6
        return array_merge(
41
            [
42 6
                'id'       => $this->getUrlParser()->getId(),
43 6
                'category' => $this->getUrlParser()->getCategory(),
44 6
            ],
45 6
            $this->getPictures(),
46 6
            $this->getProperties(),
47 6
            $this->getDescription()
48 6
        );
49
    }
50
51
    /**
52
     * Return an array with the Thumbs pictures url
53
     *
54
     * @param Crawler $node
55
     * @return array
56
     */
57 8
    public function getPictures(Crawler $node = null)
58
    {
59 8
        $node = $node ?: $this->node;
60
61
        $images = [
62 8
            'images_thumbs' => [],
63 8
            'images'        => [],
64 8
        ];
65
66
        $node
67 8
            ->filter('.adview_main script')
68
            ->each(function (Crawler $crawler) use (&$images) {
69 8
                if (preg_match_all(
70 8
                    '#//img.+.leboncoin.fr/.*\.jpg#',
71 8
                    $crawler->html(),
72
                    $matches
73 8
                )) {
74 8
                    foreach ($matches[0] as $image) {
75 8
                        if (preg_match('/thumb/', $image)) {
76 8
                            array_push(
77 8
                                $images['images_thumbs'],
78 8
                                (string)Http::createFromString($image)
79 8
                                    ->withScheme($this->sheme)
80 8
                            );
81
82 8
                            continue;
83
                        }
84
85 8
                        array_push(
86 8
                            $images['images'],
87 8
                            (string)Http::createFromString($image)
88 8
                                ->withScheme($this->sheme)
89 8
                        );
90 8
                    }
91 8
                }
92 8
            });
93
94 8
        return $images;
95
    }
96
97
    /**
98
     * Return the common information (price, cp, city)
99
     *
100
     * @param Crawler $node
101
     *
102
     * @return array
103
     */
104 8
    public function getProperties(Crawler $node = null)
105
    {
106 8
        $node = $node ?: $this->node;
107
108
        $properties = [
109 8
            'titre'      => DefaultSanitizer::clean(
110 8
                $node->filter('h1')->text()
111 8
            ),
112
            'created_at' => $node
113 8
                ->filter('*[itemprop=availabilityStarts]')
114 8
                ->first()
115 8
                ->attr('content'),
116 8
            'is_pro' => ($node->filter('.ispro')->count()),
117 8
        ];
118
119 8
        $node->filter('h2')
120 8
            ->each(function (Crawler $crawler) use (&$properties) {
121 8
                $properties = array_merge(
122 8
                    $properties,
123 8
                    $this->sanitize(
124 8
                        $crawler->filter('.property')->text(),
125 8
                        $crawler->filter('.value')->text()
126 8
                    )
127 8
                );
128 8
            });
129
130 8
        return ['properties' => $properties];
131
    }
132
133
    /**
134
     * Return the description
135
     *
136
     * @param Crawler $node
137
     * @return string
138
     */
139 8
    public function getDescription(Crawler $node = null)
140
    {
141 8
        $node = $node ?: $this->node;
142
143
        return [
144 8
            'description' => $this->getFieldValue(
145 8
                $node->filter("p[itemprop=description]"),
146
                null
147 8
            )
148 8
        ];
149
    }
150
151
    /**
152
     * Transform the properties name into a snake_case string and sanitize
153
     * the value
154
     *
155
     * @param string $key
156
     * @param string $value
157
     * @return string
158
     */
159 8
    private function sanitize($key, $value)
160
    {
161 8
        $key = KeySanitizer::clean($key);
162
163 8
        if ($key == 'ville') {
164
            return [
165 8
                'ville' => CitySanitizer::clean($value),
166 8
                'cp'    => CpSanitizer::clean($value),
167 8
            ];
168
        }
169
170 8
        $filterName = 'Lbc\\Filter\\' . ucfirst($key) . 'Sanitizer';
171
172 8
        if (!class_exists($filterName)) {
173 8
            $filterName = 'Lbc\\Filter\\DefaultSanitizer';
174 8
        }
175
176 8
        return [$key => call_user_func("$filterName::clean", $value)];
177
    }
178
}
179