Completed
Pull Request — master (#8)
by
unknown
02:34
created

SearchResultAdCrawler::getThumb()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 19
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 2.0023

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 19
ccs 11
cts 12
cp 0.9167
rs 9.4285
cc 2
eloc 11
nc 2
nop 0
crap 2.0023
1
<?php namespace Lbc\Crawler;
2
3
use League\Url\Url;
4
use Symfony\Component\DomCrawler\Crawler;
5
6
/**
7
 * At the moment I'm writing this piece of code, an ads follow this
8
 * structure:
9
 *
10
 *     <a href="http://www.leboncoin.fr/{{ $category }}/{{ $id }}.htm?ca=4_s" title="{{ $title }}">
11
 *         <div class="lbc">
12
 *             <div class="date">
13
 *                 <div>{{ $date }}</div>
14
 *                 <div>{{ $time }}</div>
15
 *             </div>
16
 *             <div class="image">
17
 *                 <div class="image-and-nb">
18
 *                     <img src="{{ $imageThumbUrl }}" alt="{{ $title }}">
19
 *                     <div class="nb">
20
 *                         <div class="top radius">&nbsp;</div>
21
 *                         <div class="value radius">{{ $nbImages}}</div>
22
 *                     </div>
23
 *                 </div>
24
 *             </div>
25
 *             <div class="detail">
26
 *                 <div class="title">{{ $title }}</div>
27
 *                 <div class="category">{{ $pro }}</div>
28
 *                 <div class="placement">{{ $placement }}</div>
29
 *                 <div class="price">{{ $price }}&nbsp;€</div>
30
 *             </div>
31
 *         </div>
32
 *     </a>
33
 */
34
class SearchResultAdCrawler
35
{
36
    protected $node;
37
    protected $url;
38
39 10
    public function __construct(Crawler $node)
40
    {
41 10
        $this->node = $node;
42 10
        $this->url = $node->attr('href');
43 10
    }
44
45
    /**
46
     * Return the Ad's ID
47
     *
48
     * @return string
49
     */
50 10
    public function getId()
51
    {
52 10
        $path = parse_url($this->url)['path'];
53
54 10
        return preg_replace('/\/\w+\/(\d+)\.htm/', '$1', $path);
55
    }
56
57
    /**
58
     * Return the title
59
     *
60
     * @return mixed
61
     */
62 6
    public function getTitle()
63
    {
64
        return $this->getFieldValue($this->node, 0, function ($value) {
65 6
            return trim($value);
66 6
        }, 'attr', 'title');
67
    }
68
69
    /**
70
     * Return the price
71
     *
72
     * @return int
73
     */
74 6
    public function getPrice()
75
    {
76 6
        $node = $this->node->filter('*[itemprop=price]');
77
78
        return $this->getFieldValue($node, 0, function ($value) {
79 6
            return (int) preg_replace('/[^\d]/', '', trim($value));
80 6
        });
81
    }
82
83
    /**
84
     * Return the Ad's URL
85
     *
86
     * @return string
87
     */
88 6
    public function getUrl()
89
    {
90 6
        return Url::createFromUrl($this->url)
91 6
            ->setScheme('http')
92 6
            ->__toString();
93
    }
94
95
    /**
96
     * Return the data and time the ad was created
97
     *
98
     * @return string
99
     */
100 6
    public function getCreatedAt()
101
    {
102 6
        $date = $this->node
103 6
            ->filter('*[itemprop=availabilityStarts]')
104 6
            ->first()
105 6
            ->attr('content')
106 3
        ;
107
108 6
        return (new \DateTime($date))->format('Y-m-d H:m');
109
    }
110
111
    /**
112
     * Return the thumb picture url
113
     *
114
     * @return null|string
115
     */
116 6
    public function getThumb()
117
    {
118 6
        $image = $this->node
119 6
            ->filter('.item_imagePic .lazyload[data-imgsrc]')
120 6
            ->first();
121
122 6
        if (0 === $image->count()) {
123
            return;
124
        }
125
126
        $src = $image
127 6
            ->attr('data-imgsrc')
128 3
        ;
129
130 6
        return Url::createFromUrl($src)
131 6
                ->setScheme('http')
132 6
                ->__toString()
133 3
        ;
134
    }
135
136
    /**
137
     * Return the number of picture of the ad
138
     *
139
     * @return int
140
     */
141 6
    public function getNbImage()
142
    {
143 6
        $node = $this->node->filter('.item_imageNumber');
144
145
        return $this->getFieldValue($node, 0, function ($value) {
146 6
            return (int)trim($value);
147 6
        });
148
    }
149
150
    /**
151
     * @return mixed
152
     */
153 6 View Code Duplication
    public function getPlacement()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
154
    {
155 6
        $node = $this->node->filter('*[itemprop=availableAtOrFrom]');
156
157
        return $this->getFieldValue($node, '', function ($value) {
158 6
            return preg_replace('/\s+/', ' ', trim($value));
159 6
        });
160
    }
161
162
    /**
163
     *
164
     * @return mixed
165
     */
166 6 View Code Duplication
    public function getType()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
167
    {
168 6
        $node = $this->node->filter('*[itemprop=category]');
169
170 6
        return $this->getFieldValue($node, false, function ($value) {
171 6
            if ('pro' == preg_replace('/[\s()]+/', '', $value)) {
172 4
                return 'pro';
173
            }
174
175 6
            return 'part';
176 6
        });
177
    }
178
179 6
    public function getAll()
180
    {
181
        return (object) [
182 6
            'id' => $this->getId(),
183 6
            'title' => $this->getTitle(),
184 6
            'price' => $this->getPrice(),
185 6
            'url' => $this->getUrl(),
186 6
            'created_at' => $this->getCreatedAt(),
187 6
            'thumb' => $this->getThumb(),
188 6
            'nb_image' => $this->getNbImage(),
189 6
            'placement' => $this->getPlacement(),
190 6
            'type' => $this->getType(),
191 3
        ];
192
    }
193
194
    /**
195
     * Return the field's value
196
     *
197
     * @param $node
198
     * @param $defaultValue
199
     * @param $callback
200
     * @param string $funcName
201
     * @param string $funcParam
202
     *
203
     * @return mixed
204
     */
205 6
    private function getFieldValue(
206
        Crawler $node,
207
        $defaultValue,
208
        $callback,
209
        $funcName = 'text',
210
        $funcParam = ''
211
    ) {
212 6
        if ($node->count()) {
213 6
            return $callback($node->$funcName($funcParam));
214
        }
215
216
        return $defaultValue;
217
    }
218
}
219