1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace SomeWork\Minjust\Parser; |
6
|
|
|
|
7
|
|
|
use Exception; |
8
|
|
|
use PHPHtmlParser\Dom; |
9
|
|
|
use PHPHtmlParser\Dom\Collection; |
10
|
|
|
use PHPHtmlParser\Dom\HtmlNode; |
11
|
|
|
use PHPHtmlParser\Exceptions\ChildNotFoundException; |
12
|
|
|
use PHPHtmlParser\Exceptions\NotLoadedException; |
13
|
|
|
use SomeWork\Minjust\Entity\DetailLawyer; |
14
|
|
|
use SomeWork\Minjust\Entity\LawFormation; |
15
|
|
|
use SomeWork\Minjust\Entity\Lawyer; |
16
|
|
|
use SomeWork\Minjust\Entity\Location; |
17
|
|
|
use SomeWork\Minjust\Exception\BlockNotFoundException; |
18
|
|
|
use SomeWork\Minjust\Exception\RuntimeException; |
19
|
|
|
use SomeWork\Minjust\FindRequest; |
20
|
|
|
use SomeWork\Minjust\FindResponse; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* @see \SomeWork\Minjust\Tests\Unit\DomParserTest |
24
|
|
|
*/ |
25
|
|
|
class DomParser implements ParserInterface |
26
|
|
|
{ |
27
|
|
|
/** |
28
|
|
|
* @var string |
29
|
|
|
*/ |
30
|
|
|
protected const PAGINATION_BLOCK_SELECTOR = 'ul.pagination'; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var string |
34
|
|
|
*/ |
35
|
|
|
protected const CURRENT_PAGE_SELECTOR = 'li.active'; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @var string |
39
|
|
|
*/ |
40
|
|
|
protected const PAGINATION_STEP_SELECTOR = 'li'; |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* @var string |
44
|
|
|
*/ |
45
|
|
|
protected const LAWYERS_LIST_BLOCK_SELECTOR = 'table.persons > tbody > tr'; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @var string |
49
|
|
|
*/ |
50
|
|
|
protected const LAWYER_DETAIL_SELECTOR = '.floating > p.row'; |
51
|
|
|
|
52
|
|
|
/** |
53
|
|
|
* @var string |
54
|
|
|
*/ |
55
|
|
|
protected const LAWYER_DETAIL_NAME_FIELD = 'label'; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* @var string |
59
|
|
|
*/ |
60
|
|
|
protected const LOCATIONS_BLOCK_SELECTOR = 'select#' . FindRequest::TERRITORIAL_SUBJECT . ' > option'; |
61
|
|
|
|
62
|
5 |
|
public function list(string $body): FindResponse |
63
|
|
|
{ |
64
|
5 |
|
$dom = (new Dom())->loadStr($body); |
65
|
|
|
|
66
|
5 |
|
return (new FindResponse()) |
67
|
5 |
|
->setPage($this->getCurrentPage($dom)) |
68
|
5 |
|
->setTotalPage($this->getTotalPage($dom)) |
69
|
5 |
|
->setLawyers($this->getListLawyers($dom)); |
70
|
|
|
} |
71
|
|
|
|
72
|
5 |
|
protected function getCurrentPage(Dom $dom): int |
73
|
|
|
{ |
74
|
|
|
try { |
75
|
|
|
/** |
76
|
|
|
* @var HtmlNode|null $block |
77
|
|
|
*/ |
78
|
|
|
$block = $this |
79
|
5 |
|
->getPagination($dom) |
80
|
5 |
|
->find(static::CURRENT_PAGE_SELECTOR, 0); |
81
|
|
|
} catch (ChildNotFoundException $exception) { |
82
|
|
|
throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception); |
83
|
|
|
} |
84
|
|
|
|
85
|
5 |
|
if (null === $block) { |
86
|
|
|
throw new BlockNotFoundException(static::CURRENT_PAGE_SELECTOR); |
87
|
|
|
} |
88
|
|
|
|
89
|
|
|
try { |
90
|
5 |
|
return (int) $block->firstChild()->text(); |
91
|
|
|
} catch (ChildNotFoundException $exception) { |
92
|
|
|
throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception); |
93
|
|
|
} |
94
|
|
|
} |
95
|
|
|
|
96
|
4 |
|
protected function getTotalPage(Dom $dom): int |
97
|
|
|
{ |
98
|
|
|
try { |
99
|
|
|
/* @noinspection NullPointerExceptionInspection */ |
100
|
|
|
$collection = $this |
101
|
4 |
|
->getPagination($dom) |
102
|
4 |
|
->find(static::PAGINATION_STEP_SELECTOR) |
103
|
4 |
|
->toArray(); |
104
|
|
|
} catch (ChildNotFoundException $exception) { |
105
|
|
|
throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception); |
106
|
|
|
} |
107
|
|
|
|
108
|
4 |
|
if (0 === count($collection)) { |
109
|
|
|
return 1; |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @var HtmlNode $last |
114
|
|
|
*/ |
115
|
4 |
|
$last = end($collection); |
116
|
|
|
|
117
|
|
|
try { |
118
|
|
|
/** |
119
|
|
|
* @var HtmlNode|null $link |
120
|
|
|
*/ |
121
|
4 |
|
$link = $last->find('a', 0); |
122
|
|
|
} catch (ChildNotFoundException $exception) { |
123
|
|
|
throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception); |
124
|
|
|
} |
125
|
|
|
|
126
|
4 |
|
if ($link) { |
127
|
2 |
|
$href = (string) $link->getAttribute('href'); |
|
|
|
|
128
|
2 |
|
$matches = []; |
129
|
2 |
|
preg_match('/page=([\d]+)/', $href, $matches); |
130
|
|
|
|
131
|
2 |
|
return (int) $matches[1]; |
132
|
|
|
} |
133
|
|
|
|
134
|
2 |
|
return $this->getCurrentPage($dom); |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
protected function getPagination(Dom $dom): HtmlNode |
138
|
|
|
{ |
139
|
|
|
static $parsedDom = null; |
140
|
|
|
static $pagination = null; |
141
|
|
|
if ($dom !== $parsedDom) { |
142
|
|
|
$parsedDom = $dom; |
143
|
|
|
|
144
|
|
|
try { |
145
|
|
|
$pagination = $dom->find(static::PAGINATION_BLOCK_SELECTOR, 0); |
146
|
|
|
} catch (Exception $exception) { |
147
|
|
|
throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
if (null === $pagination) { |
151
|
|
|
throw new BlockNotFoundException(static::PAGINATION_BLOCK_SELECTOR); |
152
|
|
|
} |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
return $pagination; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* @param Dom $dom |
160
|
|
|
* |
161
|
|
|
* @return Lawyer[] |
162
|
|
|
* @throws ChildNotFoundException |
163
|
|
|
* @throws NotLoadedException |
164
|
|
|
*/ |
165
|
5 |
|
protected function getListLawyers(Dom $dom): array |
166
|
|
|
{ |
167
|
5 |
|
$data = []; |
168
|
|
|
/** |
169
|
|
|
* @var Dom\HtmlNode[]|Collection $nodes |
170
|
|
|
*/ |
171
|
5 |
|
$nodes = $dom->find(static::LAWYERS_LIST_BLOCK_SELECTOR); |
172
|
5 |
|
$locations = $this->getLocations($dom); |
173
|
|
|
|
174
|
5 |
|
foreach ($nodes as $node) { |
175
|
|
|
/** |
176
|
|
|
* @var Dom\HtmlNode[]|Collection $tds |
177
|
|
|
*/ |
178
|
5 |
|
$tds = $node->find('td'); |
179
|
|
|
|
180
|
5 |
|
$registerNumber = trim($tds[0]->text()); |
181
|
5 |
|
$certificateNumber = trim($tds[3]->text()); |
182
|
5 |
|
$fullName = trim($tds[1]->text(true)); |
183
|
5 |
|
$url = trim($tds[1]->firstChild()->getAttribute('href')); |
184
|
5 |
|
$status = trim($tds[4]->text()); |
185
|
5 |
|
$location = $this->getLocationByRegisterNumber($registerNumber, $locations); |
186
|
|
|
|
187
|
5 |
|
if (null === $location) { |
188
|
|
|
continue; |
189
|
|
|
} |
190
|
|
|
|
191
|
5 |
|
$data[] = (new Lawyer()) |
192
|
5 |
|
->setRegisterNumber($registerNumber) |
193
|
5 |
|
->setCertificateNumber($certificateNumber) |
194
|
5 |
|
->setFullName($fullName) |
195
|
5 |
|
->setUrl($url) |
196
|
5 |
|
->setLocation($location) |
197
|
5 |
|
->setStatus($status); |
198
|
|
|
} |
199
|
|
|
|
200
|
5 |
|
return $data; |
201
|
|
|
} |
202
|
|
|
|
203
|
2 |
|
public function detail(string $body): DetailLawyer |
204
|
|
|
{ |
205
|
2 |
|
$dom = (new Dom())->loadStr($body); |
206
|
|
|
|
207
|
|
|
/** |
208
|
|
|
* @var Dom\HtmlNode[] $nodes |
209
|
|
|
*/ |
210
|
2 |
|
$nodes = $dom->find(static::LAWYER_DETAIL_SELECTOR)->toArray(); |
211
|
|
|
|
212
|
|
|
$nodes = array_filter($nodes, static function (HtmlNode $htmlNode) { |
213
|
2 |
|
return strpos($htmlNode->getAttribute('class'), static::LAWYER_DETAIL_NAME_FIELD) === false; |
|
|
|
|
214
|
2 |
|
}); |
215
|
|
|
|
216
|
2 |
|
$nodes = array_values($nodes); |
217
|
|
|
|
218
|
2 |
|
$lawyer = (new DetailLawyer()) |
219
|
2 |
|
->setChamberOfLaw(trim($nodes[2]->text())); |
220
|
|
|
|
221
|
2 |
|
if (($organizationForm = trim($nodes[3]->text())) !== '') { |
222
|
1 |
|
$lawyer->setLawFormation( |
223
|
1 |
|
(new LawFormation()) |
224
|
1 |
|
->setOrganizationalForm($organizationForm) |
225
|
1 |
|
->setName(trim($nodes[4]->text())) |
226
|
1 |
|
->setAddress(trim($nodes[5]->text())) |
227
|
1 |
|
->setPhone(trim($nodes[6]->text())) |
228
|
1 |
|
->setEmail(trim($nodes[7]->text())) |
229
|
|
|
); |
230
|
|
|
} |
231
|
|
|
|
232
|
2 |
|
return $lawyer; |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* @param string $registerNumber |
237
|
|
|
* @param Location[] $locations |
238
|
|
|
* |
239
|
|
|
* @return Location|null |
240
|
|
|
* @return Location|null |
241
|
|
|
*/ |
242
|
|
|
protected function getLocationByRegisterNumber(string $registerNumber, array $locations): ?Location |
243
|
|
|
{ |
244
|
|
|
[$locationId,] = explode('/', $registerNumber); |
245
|
|
|
|
246
|
|
|
foreach ($locations as $location) { |
247
|
|
|
if ($location->getId() === $locationId) { |
248
|
|
|
return $location; |
249
|
|
|
} |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
return null; |
253
|
|
|
} |
254
|
|
|
|
255
|
2 |
|
protected function getLocations(Dom $dom) |
256
|
|
|
{ |
257
|
|
|
/** |
258
|
|
|
* @var Dom\HtmlNode[] $nodes |
259
|
|
|
*/ |
260
|
2 |
|
$nodes = $dom->find(static::LOCATIONS_BLOCK_SELECTOR)->toArray(); |
261
|
|
|
|
262
|
2 |
|
$nodes = array_filter( |
263
|
2 |
|
$nodes, |
264
|
|
|
static function (HtmlNode $htmlNode) { |
265
|
2 |
|
return $htmlNode->getAttribute('value'); |
|
|
|
|
266
|
2 |
|
} |
267
|
|
|
); |
268
|
|
|
|
269
|
2 |
|
return array_map( |
270
|
|
|
static function (HtmlNode $htmlNode) { |
271
|
2 |
|
$id = trim($htmlNode->getAttribute('value')); |
|
|
|
|
272
|
|
|
|
273
|
2 |
|
$name = $htmlNode->text(); |
274
|
2 |
|
$name = str_replace('[' . $id . ']', '', $name); |
275
|
2 |
|
$name = trim($name); |
276
|
|
|
|
277
|
2 |
|
return (new Location()) |
278
|
2 |
|
->setId($id) |
279
|
2 |
|
->setName($name); |
280
|
2 |
|
}, |
281
|
|
|
$nodes |
282
|
|
|
); |
283
|
|
|
} |
284
|
|
|
} |
285
|
|
|
|
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.