Completed
Pull Request — master (#13)
by Pierre
11:11 queued 08:28
created

SearchResultAdCrawler   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 186
Duplicated Lines 10.75 %

Coupling/Cohesion

Components 2
Dependencies 2

Test Coverage

Coverage 96.77%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 15
c 1
b 0
f 0
lcom 2
cbo 2
dl 20
loc 186
ccs 60
cts 62
cp 0.9677
rs 10

12 Methods

Rating   Name   Duplication   Size   Complexity  
A getUrl() 0 4 1
A __construct() 0 5 1
A getId() 0 6 1
A getTitle() 0 6 1
A getPrice() 0 8 1
A getCreatedAt() 0 17 1
A getThumb() 0 16 2
A getNbImage() 0 8 1
A getPlacement() 8 8 1
A getType() 12 12 2
A getAll() 0 14 1
A getFieldValue() 0 13 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
namespace Lbc\Crawler;
4
5
use League\Uri\Components\Scheme;
6
use League\Uri\Schemes\Http;
7
use Symfony\Component\DomCrawler\Crawler;
8
9
/**
10
 * At the moment I'm writing this piece of code, an ads follow this
11
 * structure:
12
 *
13
 *     <a href="http://www.leboncoin.fr/{{ $category }}/{{ $id }}.htm?ca=4_s" title="{{ $title }}">
14
 *         <div class="lbc">
15
 *             <div class="date">
16
 *                 <div>{{ $date }}</div>
17
 *                 <div>{{ $time }}</div>
18
 *             </div>
19
 *             <div class="image">
20
 *                 <div class="image-and-nb">
21
 *                     <img src="{{ $imageThumbUrl }}" alt="{{ $title }}">
22
 *                     <div class="nb">
23
 *                         <div class="top radius">&nbsp;</div>
24
 *                         <div class="value radius">{{ $nbImages}}</div>
25
 *                     </div>
26
 *                 </div>
27
 *             </div>
28
 *             <div class="detail">
29
 *                 <div class="title">{{ $title }}</div>
30
 *                 <div class="category">{{ $pro }}</div>
31
 *                 <div class="placement">{{ $placement }}</div>
32
 *                 <div class="price">{{ $price }}&nbsp;€</div>
33
 *             </div>
34
 *         </div>
35
 *     </a>
36
 */
37
class SearchResultAdCrawler
38
{
39
    protected $node;
40
    protected $url;
41
42 10
    public function __construct(Crawler $node)
43
    {
44 10
        $this->node = $node;
45 10
        $this->url = $node->attr('href');
46 10
    }
47
48
    /**
49
     * Return the Ad's ID
50
     *
51
     * @return string
52
     */
53 10
    public function getId()
54
    {
55 10
        $path = parse_url($this->url)['path'];
56
57 10
        return preg_replace('/\/\w+\/(\d+)\.htm/', '$1', $path);
58
    }
59
60
    /**
61
     * Return the title
62
     *
63
     * @return mixed
64
     */
65 6
    public function getTitle()
66
    {
67
        return $this->getFieldValue($this->node, 0, function ($value) {
68 6
            return trim($value);
69 6
        }, 'attr', 'title');
70
    }
71
72
    /**
73
     * Return the price
74
     *
75
     * @return int
76
     */
77 6
    public function getPrice()
78
    {
79 6
        $node = $this->node->filter('*[itemprop=price]');
80
81
        return $this->getFieldValue($node, 0, function ($value) {
82 6
            return (int) preg_replace('/\D/', '', trim($value));
83 6
        });
84
    }
85
86
    /**
87
     * Return the Ad's URL
88
     *
89
     * @return string
90
     */
91 6
    public function getUrl()
92
    {
93 6
        return (string)Http::createFromString($this->url)->withScheme('http');
94
    }
95
96
    /**
97
     * Return the data and time the ad was created
98
     *
99
     * @return string
100
     */
101 6
    public function getCreatedAt()
102
    {
103 6
        $node = $this->node
104 6
            ->filter('*[itemprop=availabilityStarts]')
105 6
            ->first()
106
        ;
107
108 6
        $date = $node->attr('content');
109
110
        $time = $this->getFieldValue($node, 0, function ($value) {
111 6
            $value = trim($value);
112
113 6
            return substr($value, strpos($value, ',') + 2);
114 6
        });
115
116 6
        return $date.' '.$time;
117
    }
118
119
    /**
120
     * Return the thumb picture url
121
     *
122
     * @return null|string
123
     */
124 6
    public function getThumb()
125
    {
126 6
        $image = $this->node
127 6
            ->filter('.item_imagePic .lazyload[data-imgsrc]')
128 6
            ->first();
129
130 6
        if (0 === $image->count()) {
131
            return null;
132
        }
133
134
        $src = $image
135 6
            ->attr('data-imgsrc')
136
        ;
137
138 6
        return (string)Http::createFromString($src)->withScheme('http');
139
    }
140
141
    /**
142
     * Return the number of picture of the ad
143
     *
144
     * @return int
145
     */
146 6
    public function getNbImage()
147
    {
148 6
        $node = $this->node->filter('.item_imageNumber');
149
150
        return $this->getFieldValue($node, 0, function ($value) {
151 6
            return (int)trim($value);
152 6
        });
153
    }
154
155
    /**
156
     * @return mixed
157
     */
158 6 View Code Duplication
    public function getPlacement()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
159
    {
160 6
        $node = $this->node->filter('*[itemprop=availableAtOrFrom]');
161
162
        return $this->getFieldValue($node, '', function ($value) {
163 6
            return preg_replace('/\s+/', ' ', trim($value));
164 6
        });
165
    }
166
167
    /**
168
     * @return mixed
169
     */
170 6 View Code Duplication
    public function getType()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
171
    {
172 6
        $node = $this->node->filter('*[itemprop=category]');
173
174 6
        return $this->getFieldValue($node, false, function ($value) {
175 6
            if ('pro' === preg_replace('/[\s()]+/', '', $value)) {
176 4
                return 'pro';
177
            }
178
179 6
            return 'part';
180 6
        });
181
    }
182
183 6
    public function getAll()
184
    {
185
        return (object) [
186 6
            'id' => $this->getId(),
187 6
            'title' => $this->getTitle(),
188 6
            'price' => $this->getPrice(),
189 6
            'url' => $this->getUrl(),
190 6
            'created_at' => $this->getCreatedAt(),
191 6
            'thumb' => $this->getThumb(),
192 6
            'nb_image' => $this->getNbImage(),
193 6
            'placement' => $this->getPlacement(),
194 6
            'type' => $this->getType(),
195
        ];
196
    }
197
198
    /**
199
     * Return the field's value
200
     *
201
     * @param $node
202
     * @param $defaultValue
203
     * @param $callback
204
     * @param string $funcName
205
     * @param string $funcParam
206
     *
207
     * @return mixed
208
     */
209 6
    private function getFieldValue(
210
        Crawler $node,
211
        $defaultValue,
212
        $callback,
213
        $funcName = 'text',
214
        $funcParam = ''
215
    ) {
216 6
        if ($node->count()) {
217 6
            return $callback($node->$funcName($funcParam));
218
        }
219
220
        return $defaultValue;
221
    }
222
}
223