Completed
Push — master ( 838849...00c88c )
by Rémi
10s
created

SearchResultAdCrawler::getThumb()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 2.0054

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 16
ccs 8
cts 9
cp 0.8889
rs 9.4285
cc 2
eloc 9
nc 2
nop 0
crap 2.0054
1
<?php
2
3
namespace Lbc\Crawler;
4
5
use League\Uri\Schemes\Http;
6
use Symfony\Component\DomCrawler\Crawler;
7
8
/**
9
 * At the moment I'm writing this piece of code, an ads follow this
10
 * structure:
11
 *
12
 *     <a href="http://www.leboncoin.fr/{{ $category }}/{{ $id }}.htm?ca=4_s" title="{{ $title }}">
13
 *         <div class="lbc">
14
 *             <div class="date">
15
 *                 <div>{{ $date }}</div>
16
 *                 <div>{{ $time }}</div>
17
 *             </div>
18
 *             <div class="image">
19
 *                 <div class="image-and-nb">
20
 *                     <img src="{{ $imageThumbUrl }}" alt="{{ $title }}">
21
 *                     <div class="nb">
22
 *                         <div class="top radius">&nbsp;</div>
23
 *                         <div class="value radius">{{ $nbImages}}</div>
24
 *                     </div>
25
 *                 </div>
26
 *             </div>
27
 *             <div class="detail">
28
 *                 <div class="title">{{ $title }}</div>
29
 *                 <div class="category">{{ $pro }}</div>
30
 *                 <div class="placement">{{ $placement }}</div>
31
 *                 <div class="price">{{ $price }}&nbsp;€</div>
32
 *             </div>
33
 *         </div>
34
 *     </a>
35
 */
36
class SearchResultAdCrawler
37
{
38
    protected $node;
39
    protected $url;
40
41 10
    public function __construct(Crawler $node)
42
    {
43 10
        $this->node = $node;
44 10
        $this->url = $node->attr('href');
45 10
    }
46
47
    /**
48
     * Return the Ad's ID
49
     *
50
     * @return string
51
     */
52 10
    public function getId()
53
    {
54 10
        $path = parse_url($this->url)['path'];
55
56 10
        return preg_replace('/\/\w+\/(\d+)\.htm/', '$1', $path);
57
    }
58
59
    /**
60
     * Return the title
61
     *
62
     * @return mixed
63
     */
64 6
    public function getTitle()
65
    {
66
        return $this->getFieldValue($this->node, 0, function ($value) {
67 6
            return trim($value);
68 6
        }, 'attr', 'title');
69
    }
70
71
    /**
72
     * Return the price
73
     *
74
     * @return int
75
     */
76 6
    public function getPrice()
77
    {
78 6
        $node = $this->node->filter('*[itemprop=price]');
79
80
        return $this->getFieldValue($node, 0, function ($value) {
81 6
            return (int) preg_replace('/\D/', '', trim($value));
82 6
        });
83
    }
84
85
    /**
86
     * Return the Ad's URL
87
     *
88
     * @return string
89
     */
90 6
    public function getUrl()
91
    {
92 6
        return (string)Http::createFromString($this->url)->withScheme('http');
93
    }
94
95
    /**
96
     * Return the data and time the ad was created
97
     *
98
     * @return string
99
     */
100 6
    public function getCreatedAt()
101
    {
102 6
        $node = $this->node
103 6
            ->filter('*[itemprop=availabilityStarts]')
104 6
            ->first()
105 3
        ;
106
107 6
        $date = $node->attr('content');
108
109
        $time = $this->getFieldValue($node, 0, function ($value) {
110 6
            $value = trim($value);
111
112 6
            return substr($value, strpos($value, ',') + 2);
113 6
        });
114
115 6
        return $date.' '.$time;
116
    }
117
118
    /**
119
     * Return the thumb picture url
120
     *
121
     * @return null|string
122
     */
123 6
    public function getThumb()
124
    {
125 6
        $image = $this->node
126 6
            ->filter('.item_imagePic .lazyload[data-imgsrc]')
127 6
            ->first();
128
129 6
        if (0 === $image->count()) {
130
            return null;
131
        }
132
133
        $src = $image
134 6
            ->attr('data-imgsrc')
135 3
        ;
136
137 6
        return (string)Http::createFromString($src)->withScheme('http');
138
    }
139
140
    /**
141
     * Return the number of picture of the ad
142
     *
143
     * @return int
144
     */
145 6
    public function getNbImage()
146
    {
147 6
        $node = $this->node->filter('.item_imageNumber');
148
149
        return $this->getFieldValue($node, 0, function ($value) {
150 6
            return (int)trim($value);
151 6
        });
152
    }
153
154
    /**
155
     * @return mixed
156
     */
157 6 View Code Duplication
    public function getPlacement()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
158
    {
159 6
        $node = $this->node->filter('*[itemprop=availableAtOrFrom]');
160
161
        return $this->getFieldValue($node, '', function ($value) {
162 6
            return preg_replace('/\s+/', ' ', trim($value));
163 6
        });
164
    }
165
166
    /**
167
     * @return mixed
168
     */
169 6 View Code Duplication
    public function getType()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
170
    {
171 6
        $node = $this->node->filter('*[itemprop=category]');
172
173 6
        return $this->getFieldValue($node, false, function ($value) {
174 6
            if ('pro' === preg_replace('/[\s()]+/', '', $value)) {
175 4
                return 'pro';
176
            }
177
178 6
            return 'part';
179 6
        });
180
    }
181
182 6
    public function getAll()
183
    {
184
        return (object) [
185 6
            'id' => $this->getId(),
186 6
            'title' => $this->getTitle(),
187 6
            'price' => $this->getPrice(),
188 6
            'url' => $this->getUrl(),
189 6
            'created_at' => $this->getCreatedAt(),
190 6
            'thumb' => $this->getThumb(),
191 6
            'nb_image' => $this->getNbImage(),
192 6
            'placement' => $this->getPlacement(),
193 6
            'type' => $this->getType(),
194 3
        ];
195
    }
196
197
    /**
198
     * Return the field's value
199
     *
200
     * @param $node
201
     * @param $defaultValue
202
     * @param $callback
203
     * @param string $funcName
204
     * @param string $funcParam
205
     *
206
     * @return mixed
207
     */
208 6
    private function getFieldValue(
209
        Crawler $node,
210
        $defaultValue,
211
        $callback,
212
        $funcName = 'text',
213
        $funcParam = ''
214
    ) {
215 6
        if ($node->count()) {
216 6
            return $callback($node->$funcName($funcParam));
217
        }
218
219
        return $defaultValue;
220
    }
221
}
222