Issues (4)

src/Parser/DomParser.php (4 issues)

1
<?php
2
3
declare(strict_types=1);
4
5
namespace SomeWork\Minjust\Parser;
6
7
use Exception;
8
use PHPHtmlParser\Dom;
9
use PHPHtmlParser\Dom\Collection;
10
use PHPHtmlParser\Dom\HtmlNode;
11
use PHPHtmlParser\Exceptions\ChildNotFoundException;
12
use PHPHtmlParser\Exceptions\NotLoadedException;
13
use SomeWork\Minjust\Entity\DetailLawyer;
14
use SomeWork\Minjust\Entity\LawFormation;
15
use SomeWork\Minjust\Entity\Lawyer;
16
use SomeWork\Minjust\Entity\Location;
17
use SomeWork\Minjust\Exception\BlockNotFoundException;
18
use SomeWork\Minjust\Exception\RuntimeException;
19
use SomeWork\Minjust\FindRequest;
20
use SomeWork\Minjust\FindResponse;
21
22
/**
23
 * @see \SomeWork\Minjust\Tests\Unit\DomParserTest
24
 */
25
class DomParser implements ParserInterface
26
{
27
    /**
28
     * @var string
29
     */
30
    protected const PAGINATION_BLOCK_SELECTOR = 'ul.pagination';
31
32
    /**
33
     * @var string
34
     */
35
    protected const CURRENT_PAGE_SELECTOR = 'li.active';
36
37
    /**
38
     * @var string
39
     */
40
    protected const PAGINATION_STEP_SELECTOR = 'li';
41
42
    /**
43
     * @var string
44
     */
45
    protected const LAWYERS_LIST_BLOCK_SELECTOR = 'table.persons > tbody > tr';
46
47
    /**
48
     * @var string
49
     */
50
    protected const LAWYER_DETAIL_SELECTOR = '.floating > p.row';
51
52
    /**
53
     * @var string
54
     */
55
    protected const LAWYER_DETAIL_NAME_FIELD = 'label';
56
57
    /**
58
     * @var string
59
     */
60
    protected const LOCATIONS_BLOCK_SELECTOR = 'select#' . FindRequest::TERRITORIAL_SUBJECT . ' > option';
61
62 5
    public function list(string $body): FindResponse
63
    {
64 5
        $dom = (new Dom())->loadStr($body);
65
66 5
        return (new FindResponse())
67 5
            ->setPage($this->getCurrentPage($dom))
68 5
            ->setTotalPage($this->getTotalPage($dom))
69 5
            ->setLawyers($this->getListLawyers($dom));
70
    }
71
72 5
    protected function getCurrentPage(Dom $dom): int
73
    {
74
        try {
75
            /**
76
             * @var HtmlNode|null $block
77
             */
78
            $block = $this
79 5
                ->getPagination($dom)
80 5
                ->find(static::CURRENT_PAGE_SELECTOR, 0);
81
        } catch (ChildNotFoundException $exception) {
82
            throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception);
83
        }
84
85 5
        if (null === $block) {
86
            throw new BlockNotFoundException(static::CURRENT_PAGE_SELECTOR);
87
        }
88
89
        try {
90 5
            return (int) $block->firstChild()->text();
91
        } catch (ChildNotFoundException $exception) {
92
            throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception);
93
        }
94
    }
95
96 4
    protected function getTotalPage(Dom $dom): int
97
    {
98
        try {
99
            /* @noinspection NullPointerExceptionInspection */
100
            $collection = $this
101 4
                ->getPagination($dom)
102 4
                ->find(static::PAGINATION_STEP_SELECTOR)
103 4
                ->toArray();
104
        } catch (ChildNotFoundException $exception) {
105
            throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception);
106
        }
107
108 4
        if (0 === count($collection)) {
109
            return 1;
110
        }
111
112
        /**
113
         * @var HtmlNode $last
114
         */
115 4
        $last = end($collection);
116
117
        try {
118
            /**
119
             * @var HtmlNode|null $link
120
             */
121 4
            $link = $last->find('a', 0);
122
        } catch (ChildNotFoundException $exception) {
123
            throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception);
124
        }
125
126 4
        if ($link) {
127 2
            $href = (string) $link->getAttribute('href');
0 ignored issues
show
Are you sure the usage of $link->getAttribute('href') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
128 2
            $matches = [];
129 2
            preg_match('/page=([\d]+)/', $href, $matches);
130
131 2
            return (int) $matches[1];
132
        }
133
134 2
        return $this->getCurrentPage($dom);
135
    }
136
137
    protected function getPagination(Dom $dom): HtmlNode
138
    {
139
        static $parsedDom = null;
140
        static $pagination = null;
141
        if ($dom !== $parsedDom) {
142
            $parsedDom = $dom;
143
144
            try {
145
                $pagination = $dom->find(static::PAGINATION_BLOCK_SELECTOR, 0);
146
            } catch (Exception $exception) {
147
                throw new RuntimeException($exception->getMessage(), $exception->getCode(), $exception);
148
            }
149
150
            if (null === $pagination) {
151
                throw new BlockNotFoundException(static::PAGINATION_BLOCK_SELECTOR);
152
            }
153
        }
154
155
        return $pagination;
156
    }
157
158
    /**
159
     * @param Dom $dom
160
     *
161
     * @return Lawyer[]
162
     * @throws ChildNotFoundException
163
     * @throws NotLoadedException
164
     */
165 5
    protected function getListLawyers(Dom $dom): array
166
    {
167 5
        $data = [];
168
        /**
169
         * @var Dom\HtmlNode[]|Collection $nodes
170
         */
171 5
        $nodes = $dom->find(static::LAWYERS_LIST_BLOCK_SELECTOR);
172 5
        $locations = $this->getLocations($dom);
173
174 5
        foreach ($nodes as $node) {
175
            /**
176
             * @var Dom\HtmlNode[]|Collection $tds
177
             */
178 5
            $tds = $node->find('td');
179
180 5
            $registerNumber = trim($tds[0]->text());
181 5
            $certificateNumber = trim($tds[3]->text());
182 5
            $fullName = trim($tds[1]->text(true));
183 5
            $url = trim($tds[1]->firstChild()->getAttribute('href'));
184 5
            $status = trim($tds[4]->text());
185 5
            $location = $this->getLocationByRegisterNumber($registerNumber, $locations);
186
187 5
            if (null === $location) {
188
                continue;
189
            }
190
191 5
            $data[] = (new Lawyer())
192 5
                ->setRegisterNumber($registerNumber)
193 5
                ->setCertificateNumber($certificateNumber)
194 5
                ->setFullName($fullName)
195 5
                ->setUrl($url)
196 5
                ->setLocation($location)
197 5
                ->setStatus($status);
198
        }
199
200 5
        return $data;
201
    }
202
203 2
    public function detail(string $body): DetailLawyer
204
    {
205 2
        $dom = (new Dom())->loadStr($body);
206
207
        /**
208
         * @var Dom\HtmlNode[] $nodes
209
         */
210 2
        $nodes = $dom->find(static::LAWYER_DETAIL_SELECTOR)->toArray();
211
212
        $nodes = array_filter($nodes, static function (HtmlNode $htmlNode) {
213 2
            return strpos($htmlNode->getAttribute('class'), static::LAWYER_DETAIL_NAME_FIELD) === false;
0 ignored issues
show
Are you sure the usage of $htmlNode->getAttribute('class') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
214 2
        });
215
216 2
        $nodes = array_values($nodes);
217
218 2
        $lawyer = (new DetailLawyer())
219 2
            ->setChamberOfLaw(trim($nodes[2]->text()));
220
221 2
        if (($organizationForm = trim($nodes[3]->text())) !== '') {
222 1
            $lawyer->setLawFormation(
223 1
                (new LawFormation())
224 1
                    ->setOrganizationalForm($organizationForm)
225 1
                    ->setName(trim($nodes[4]->text()))
226 1
                    ->setAddress(trim($nodes[5]->text()))
227 1
                    ->setPhone(trim($nodes[6]->text()))
228 1
                    ->setEmail(trim($nodes[7]->text()))
229
            );
230
        }
231
232 2
        return $lawyer;
233
    }
234
235
    /**
236
     * @param string     $registerNumber
237
     * @param Location[] $locations
238
     *
239
     * @return Location|null
240
     * @return Location|null
241
     */
242
    protected function getLocationByRegisterNumber(string $registerNumber, array $locations): ?Location
243
    {
244
        [$locationId,] = explode('/', $registerNumber);
245
246
        foreach ($locations as $location) {
247
            if ($location->getId() === $locationId) {
248
                return $location;
249
            }
250
        }
251
252
        return null;
253
    }
254
255 2
    protected function getLocations(Dom $dom)
256
    {
257
        /**
258
         * @var Dom\HtmlNode[] $nodes
259
         */
260 2
        $nodes = $dom->find(static::LOCATIONS_BLOCK_SELECTOR)->toArray();
261
262 2
        $nodes = array_filter(
263 2
            $nodes,
264
            static function (HtmlNode $htmlNode) {
265 2
                return $htmlNode->getAttribute('value');
0 ignored issues
show
Are you sure the usage of $htmlNode->getAttribute('value') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
266 2
            }
267
        );
268
269 2
        return array_map(
270
            static function (HtmlNode $htmlNode) {
271 2
                $id = trim($htmlNode->getAttribute('value'));
0 ignored issues
show
Are you sure the usage of $htmlNode->getAttribute('value') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
272
273 2
                $name = $htmlNode->text();
274 2
                $name = str_replace('[' . $id . ']', '', $name);
275 2
                $name = trim($name);
276
277 2
                return (new Location())
278 2
                    ->setId($id)
279 2
                    ->setName($name);
280 2
            },
281
            $nodes
282
        );
283
    }
284
}
285