Version20190213::iAmCurious()   A
last analyzed

Complexity

Conditions 3
Paths 4

Size

Total Lines 37
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 23
dl 0
loc 37
rs 9.552
c 0
b 0
f 0
cc 3
nc 4
nop 2
1
<?php
2
3
/*
4
 * This file is part of the Veslo project <https://github.com/symfony-doge/veslo>.
5
 *
6
 * (C) 2019 Pavel Petrov <[email protected]>.
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 *
11
 * @license https://opensource.org/licenses/GPL-3.0 GPL-3.0
12
 */
13
14
declare(strict_types=1);
15
16
namespace Veslo\AnthillBundle\Vacancy\Roadmap\Strategy\HeadHunter\Api;
17
18
use DateTime;
19
use Exception;
20
use GuzzleHttp\ClientInterface;
21
use GuzzleHttp\Exception\GuzzleException;
22
use Psr\Log\LoggerInterface;
23
use Symfony\Component\HttpFoundation\Request;
24
use Symfony\Component\Serializer\Encoder\DecoderInterface;
25
use Veslo\AnthillBundle\Entity\Vacancy\Roadmap\Configuration\Parameters\HeadHunter as HeadHunterParameters;
26
use Veslo\AnthillBundle\Exception\Vacancy\Roadmap\Strategy\LookupFailedException;
27
use Veslo\AnthillBundle\Vacancy\Roadmap\ConfigurationInterface;
28
use Veslo\AnthillBundle\Vacancy\Roadmap\StrategyInterface;
29
30
/**
31
 * Represents vacancy search algorithm for HeadHunter site based on public API
32
 *
33
 * The problem is that we can't sort vacancies by publication_date in ascending order. Only in descending.
34
 * This algorithm performs vacancy fetching in ascending order by managing additional parameter - received count.
35
 * So it provides vacancy fetching in real time by their actual publication order.
36
 *
37
 * Request example:
38
 * https://api.hh.ru/vacancies?text=php&area=113&order_by=publication_time&date_from=2019-02-13T00:00:00&date_to=2019-02-14T00:00:00&per_page=1&page=0
39
 *
40
 * @see https://github.com/hhru/api/blob/master/docs/general.md
41
 */
42
class Version20190213 implements StrategyInterface
43
{
44
    /**
45
     * Maximum depth for internal solutions based on recursion
46
     *
47
     * @const int
48
     */
49
    private const MAX_RECURSION_CALLS = 3;
50
51
    /**
52
     * Logger
53
     *
54
     * @var LoggerInterface
55
     */
56
    private $logger;
57
58
    /**
59
     * Sends http requests
60
     *
61
     * @var ClientInterface
62
     */
63
    private $httpClient;
64
65
    /**
66
     * Decodes data from specified format into PHP data
67
     *
68
     * @var DecoderInterface
69
     */
70
    private $decoder;
71
72
    /**
73
     * Decoded last response from website
74
     *
75
     * @var array
76
     */
77
    private $_lastResponse;
78
79
    /**
80
     * Last resolved vacancy URL indexed by configuration key using in search
81
     *
82
     * @var array
83
     */
84
    private $_lastResolvedUrl;
85
86
    /**
87
     * Available recursion calls for internal solutions
88
     *
89
     * @var int
90
     */
91
    private $_recursionCallsAvailable;
92
93
    /**
94
     * Version20190213 constructor.
95
     *
96
     * @param LoggerInterface  $logger     Logger
97
     * @param ClientInterface  $httpClient Sends http requests
98
     * @param DecoderInterface $decoder    Decodes data from specified format into PHP data
99
     */
100
    public function __construct(LoggerInterface $logger, ClientInterface $httpClient, DecoderInterface $decoder)
101
    {
102
        $this->logger     = $logger;
103
        $this->httpClient = $httpClient;
104
        $this->decoder    = $decoder;
105
106
        $this->_lastResponse            = [];
107
        $this->_lastResolvedUrl         = [];
108
        $this->_recursionCallsAvailable = self::MAX_RECURSION_CALLS;
109
    }
110
111
    /**
112
     * {@inheritdoc}
113
     */
114
    public function lookup(ConfigurationInterface $configuration): ?string
115
    {
116
        /** @var HeadHunterParameters $parameters */
117
        $parameters       = $configuration->getParameters();
118
        $configurationKey = $parameters->getConfigurationKey();
119
120
        if (!empty($this->_lastResolvedUrl[$configurationKey])) {
121
            return $this->_lastResolvedUrl[$configurationKey];
122
        }
123
124
        $this->adjustDatesToCurrentDay($configuration);
125
126
        $found = $this->howMany($parameters);
127
        $page  = $this->determinePage($configuration, $found);
128
129
        if (null === $page) {
130
            return null;
131
        }
132
133
        // We already have a cached response for first page after howMany check.
134
        if (1 === $page) {
135
            return $this->resolveUrl($this->_lastResponse, $configurationKey);
136
        }
137
138
        return $this->copyOnWriteLookup($configuration, $page, $found);
139
    }
140
141
    /**
142
     * {@inheritdoc}
143
     */
144
    public function iterate(ConfigurationInterface $configuration): void
145
    {
146
        /** @var HeadHunterParameters $parameters */
147
        $parameters = $configuration->getParameters();
148
        $received   = $parameters->getReceived();
149
150
        $parameters->setReceived($received + 1);
151
        $configuration->save();
152
153
        $configurationKey = $parameters->getConfigurationKey();
154
155
        $this->_lastResolvedUrl[$configurationKey] = null;
156
        $this->_recursionCallsAvailable            = self::MAX_RECURSION_CALLS;
157
    }
158
159
    /**
160
     * Sets search publication date range to current day if it is not valid
161
     *
162
     * @param ConfigurationInterface $configuration Roadmap configuration with parameters for search algorithm
163
     *
164
     * @return void
165
     *
166
     * @throws Exception
167
     */
168
    private function adjustDatesToCurrentDay(ConfigurationInterface $configuration): void
169
    {
170
        /** @var HeadHunterParameters $parameters */
171
        $parameters = $configuration->getParameters();
172
        $today      = new DateTime('today');
173
174
        if ($parameters->getDateFrom() == $today) {
175
            return;
176
        }
177
178
        $parameters->setDateFrom($today);
179
        $tomorrow = new DateTime('tomorrow');
180
        $parameters->setDateTo($tomorrow);
181
        $parameters->setReceived(0);
182
183
        $configuration->save();
184
    }
185
186
    /**
187
     * Returns count of vacancies which are available on website at current time and satisfies search criteria
188
     *
189
     * @param HeadHunterParameters $parameters Parameters for vacancy search on HeadHunter website
190
     *
191
     * @return int
192
     */
193
    private function howMany(HeadHunterParameters $parameters): int
194
    {
195
        $response = $this->iAmCurious($parameters);
196
197
        return $this->resolveFound($response);
198
    }
199
200
    /**
201
     * Returns an actual page content with guarantees that it is not changed during algorithm execution
202
     *
203
     * To prevent invalid page read between search query and actual fetching query, $found should not be changed.
204
     * If $found changes it means some vacancy has been added or deleted during lookup
205
     * and we should try again to ensure consistency with real page content
206
     *
207
     * @param ConfigurationInterface $configuration Roadmap configuration with parameters for search algorithm
208
     * @param int                    $page          Page number
209
     * @param int                    $found         Last fetched value of total vacancies available
210
     *
211
     * @return string|null
212
     */
213
    private function copyOnWriteLookup(ConfigurationInterface $configuration, int $page, int $found): ?string
214
    {
215
        /** @var HeadHunterParameters $parameters */
216
        $parameters = $configuration->getParameters();
217
218
        if (0 > $this->_recursionCallsAvailable) {
219
            $providerUri = $parameters->getProviderUri();
220
221
            throw LookupFailedException::providerIsUnstable($providerUri);
222
        }
223
224
        $response = $this->iAmCurious($parameters, $page);
225
        $newFound = $this->resolveFound($response);
226
227
        // Vacancy count has not been changed while we actually tried to fetch it.
228
        if ($newFound === $found) {
229
            $configurationKey = $parameters->getConfigurationKey();
230
231
            return $this->resolveUrl($response, $configurationKey);
232
        }
233
234
        // Otherwise some vacancy could be deleted or hidden, we should sync our configuration parameters.
235
        // Retrying to determine correct page for next vacancy lookup using new maximum vacancy count.
236
        $newPage = $this->determinePage($configuration, $newFound);
237
238
        if (null === $newPage) {
239
            return null;
240
        }
241
242
        --$this->_recursionCallsAvailable;
243
244
        return $this->copyOnWriteLookup($configuration, $newPage, $newFound);
245
    }
246
247
    /**
248
     * Return vacancy data by specified search criteria
249
     * Response will be decoded from json to PHP array
250
     *
251
     * @param HeadHunterParameters $parameters Parameters for vacancy search on HeadHunter website
252
     * @param int                  $page       Page number
253
     *
254
     * @return array
255
     */
256
    private function iAmCurious(HeadHunterParameters $parameters, int $page = 0): array
257
    {
258
        $uri   = $parameters->getUrl();
259
        $query = [
260
            'text'      => $parameters->getText(),
261
            'order_by'  => $parameters->getOrderBy(),
262
            'date_from' => $parameters->getDateFromFormatted(),
263
            'date_to'   => $parameters->getDateToFormatted(),
264
            'per_page'  => $parameters->getPerPage(),
265
            'page'      => $page,
266
        ];
267
268
        $area = $parameters->getArea();
269
270
        if (!empty($area)) {
271
            $query['area'] = $area;
272
        }
273
274
        try {
275
            $response = $this->httpClient->request(Request::METHOD_GET, $uri, ['query' => $query]);
276
        } catch (GuzzleException $e) {
277
            $this->logger->error(
278
                'Request to website failed.',
279
                [
280
                    'message' => $e->getMessage(),
281
                    'uri'     => $uri,
282
                    'query'   => $query,
283
                ]
284
            );
285
286
            throw LookupFailedException::withPrevious($e);
287
        }
288
289
        $json  = $response->getBody()->getContents();
290
        $array = $this->decoder->decode($json, 'json');
291
292
        return $this->_lastResponse = $array;
293
    }
294
295
    /**
296
     * Returns count of vacancies found retrieved from response
297
     *
298
     * @param array $response Response from website
299
     *
300
     * @return int
301
     */
302
    private function resolveFound(array $response): int
303
    {
304
        if (array_key_exists('found', $response) && is_numeric($response['found'])) {
305
            return (int) $response['found'];
306
        }
307
308
        throw LookupFailedException::unexpectedResponse('found');
309
    }
310
311
    /**
312
     * Returns vacancy URL for parsing retrieved from response
313
     *
314
     * @param array  $response Response from website
315
     * @param string $cacheKey Whenever result should be cached until next iterate() call
316
     *
317
     * @return string
318
     */
319
    private function resolveUrl(array $response, ?string $cacheKey = null): string
320
    {
321
        // Symfony PropertyAccessor is not used due to unnecessary dependency overload.
322
        // This strategy requires only two fields to analyse: found count and url.
323
        if (!empty($response['items'])) {
324
            $item = array_shift($response['items']);
325
326
            if (!empty($item['url'])) {
327
                if (!empty($cacheKey)) {
328
                    $this->_lastResolvedUrl[$cacheKey] = $item['url'];
329
                }
330
331
                return $item['url'];
332
            }
333
        }
334
335
        throw LookupFailedException::unexpectedResponse('items.0.url');
336
    }
337
338
    /**
339
     * Returns page for next lookup
340
     * Encapsulates ascending order managing logic
341
     *
342
     * @param ConfigurationInterface $configuration Roadmap configuration with parameters for search algorithm
343
     * @param int                    $found         Freshly total count of vacancies for specified search criteria
344
     *
345
     * @return int|null Page number in 0..N range or null if no new vacancies
346
     */
347
    private function determinePage(ConfigurationInterface $configuration, int $found): ?int
348
    {
349
        // Provider can potentially remove or hide some vacancies.
350
        $received = $this->normalizeReceived($configuration, $found);
351
352
        // No new vacancies.
353
        if ($found === $received) {
354
            return null;
355
        }
356
357
        return $found - $received - 1;
358
    }
359
360
    /**
361
     * Returns received vacancies count synchronized with actual total count on website by search criteria
362
     * Also guarantees that page number cannot fall to less than 0 during page determination
363
     *
364
     * @param ConfigurationInterface $configuration Roadmap configuration with parameters for search algorithm
365
     * @param int                    $found         Vacancies total count for specified search criteria
366
     *
367
     * @return int
368
     */
369
    private function normalizeReceived(ConfigurationInterface $configuration, int $found): int
370
    {
371
        /** @var HeadHunterParameters $parameters */
372
        $parameters = $configuration->getParameters();
373
        $received   = $parameters->getReceived();
374
375
        if ($received <= $found) {
376
            return $received;
377
        }
378
379
        $parameters->setReceived($found);
380
        $configuration->save();
381
382
        return $found;
383
    }
384
}
385