1 | <?php |
||||
2 | |||||
3 | |||||
4 | namespace GDCInfo\Flows; |
||||
5 | |||||
6 | use Carbon\Carbon; |
||||
7 | use DOMDocument; |
||||
8 | use DOMXPath; |
||||
9 | use GDCInfo\Exceptions\FindGDCInfoException; |
||||
10 | use GDCInfo\Exceptions\NotFoundGDCInfoException; |
||||
11 | use GDCInfo\GDCInfo; |
||||
12 | use Illuminate\Support\Facades\Http; |
||||
13 | use Illuminate\Support\Str; |
||||
14 | |||||
15 | class GDCInfoFromHtmlFlow |
||||
16 | { |
||||
17 | |||||
18 | 6 | public static function make(...$arguments) |
|||
19 | { |
||||
20 | 6 | return new static(...$arguments); |
|||
0 ignored issues
–
show
|
|||||
21 | } |
||||
22 | |||||
23 | 6 | public function get(string $gdcNumber): GDCInfo |
|||
24 | { |
||||
25 | 6 | $response = Http::timeout(20) |
|||
26 | 6 | ->get('https://olr.gdc-uk.org/SearchRegister/SearchResult', [ |
|||
27 | 6 | 'RegistrationNumber' => $gdcNumber, |
|||
28 | 6 | ]); |
|||
29 | |||||
30 | 6 | if (!$response->successful()) { |
|||
31 | throw new FindGDCInfoException("Response error with status [{$response->status()}]. We can't parse html"); |
||||
32 | } |
||||
33 | |||||
34 | 6 | $domDocument = new DOMDocument(); |
|||
35 | 6 | @$domDocument->loadHTML($response->body()); |
|||
0 ignored issues
–
show
It seems like you do not handle an error condition for
loadHTML() . This can introduce security issues, and is generally not recommended.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
If you suppress an error, we recommend checking for the error condition explicitly: // For example instead of
@mkdir($dir);
// Better use
if (@mkdir($dir) === false) {
throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
![]() |
|||||
36 | 6 | $xpath = new DOMXpath($domDocument); |
|||
37 | 6 | $heads = $xpath->query('/html/head/title'); |
|||
38 | 6 | if (!$heads || !($head = $heads->item(0)) || !$head->nodeValue) { |
|||
0 ignored issues
–
show
|
|||||
39 | throw new FindGDCInfoException("'Head' elements not exists. Error on parsing data"); |
||||
40 | } |
||||
41 | |||||
42 | 6 | if ($head->nodeValue === 'No Results') { |
|||
43 | 3 | throw new NotFoundGDCInfoException("Invalid gdc number [{$gdcNumber}]."); |
|||
44 | } |
||||
45 | |||||
46 | 3 | $data = [ |
|||
47 | 3 | ...$this->parseName($xpath), |
|||
48 | 3 | ...$this->parseInfo($xpath), |
|||
49 | 3 | ]; |
|||
50 | |||||
51 | 3 | return GDCInfo::fromArray($data); |
|||
52 | } |
||||
53 | |||||
54 | 3 | protected function parseName(DOMXPath $xpath): array |
|||
55 | { |
||||
56 | 3 | $names = $xpath->query('/html/body/div/main/div/div/div/div/div/div/div/h2'); |
|||
57 | 3 | if (!$names || !($name = $names->item(0)) || !$name->nodeValue) { |
|||
0 ignored issues
–
show
|
|||||
58 | throw new FindGDCInfoException("'Name item not found' elements not exists. Error on parsing data"); |
||||
59 | } |
||||
60 | |||||
61 | 3 | $data = []; |
|||
62 | 3 | $firstName = $name->childNodes->item(1)?->nodeValue; |
|||
63 | 3 | if ($firstName) { |
|||
64 | 3 | $data['first_name'] = trim($firstName); |
|||
65 | } |
||||
66 | 3 | $lastName = $name->childNodes->item(2)?->nodeValue; |
|||
67 | 3 | if ($lastName) { |
|||
68 | 3 | $data['last_name'] = trim($lastName); |
|||
69 | } |
||||
70 | |||||
71 | 3 | return $data; |
|||
72 | } |
||||
73 | |||||
74 | 3 | protected function parseInfo(DOMXPath $xpath): array |
|||
75 | { |
||||
76 | /** @var \DOMNodeList $items */ |
||||
77 | 3 | $items = $xpath->query('/html/body/div/main/div/div/div/div/div/div/div/div'); |
|||
78 | 3 | if (!$items) { |
|||
0 ignored issues
–
show
|
|||||
79 | throw new FindGDCInfoException("'Info items not found' elements not exists. Error on parsing data"); |
||||
80 | } |
||||
81 | |||||
82 | 3 | $data = []; |
|||
83 | |||||
84 | 3 | for ($i = 1; $i < $items->count(); $i++) { |
|||
85 | 3 | $item = $items->item($i); |
|||
86 | 3 | if (!$item) { |
|||
87 | continue; |
||||
88 | } |
||||
89 | 3 | $key = rtrim(trim((string)$item->childNodes->item(1)?->nodeValue), ':'); |
|||
90 | 3 | $value = rtrim(trim((string)$item->childNodes->item(3)?->nodeValue), ':'); |
|||
91 | 3 | if (!$key || !$value) { |
|||
92 | 3 | continue; |
|||
93 | } |
||||
94 | |||||
95 | 3 | $data[Str::snake($key)] = $value; |
|||
96 | } |
||||
97 | |||||
98 | 3 | if (isset($data['first_registered_on'])) { |
|||
99 | 3 | $date = $data['first_registered_on']; |
|||
100 | 3 | unset($data['first_registered_on']); |
|||
101 | |||||
102 | try { |
||||
103 | 3 | $date = Carbon::createFromFormat('d M Y', $date); |
|||
104 | 3 | if ($date) { |
|||
105 | 3 | $data['first_registered_on'] = $date; |
|||
106 | } |
||||
107 | } catch (\Exception $e) { |
||||
108 | // nothing |
||||
109 | } |
||||
110 | } |
||||
111 | |||||
112 | 3 | if (isset($data['current_period_of_registration_from'])) { |
|||
113 | 3 | $dateData = $data['current_period_of_registration_from']; |
|||
114 | 3 | unset($data['current_period_of_registration_from']); |
|||
115 | 3 | if (Str::contains($dateData, 'until:')) { |
|||
116 | 3 | $dateData = array_map('trim', explode('until:', $dateData)); |
|||
117 | 3 | if (count($dateData) == 2) { |
|||
118 | 3 | $date = $dateData[0]; |
|||
119 | |||||
120 | try { |
||||
121 | 3 | $date = Carbon::createFromFormat('d M Y', $date); |
|||
122 | 3 | if ($date) { |
|||
123 | 3 | $data['current_period_from'] = $date; |
|||
124 | } |
||||
125 | } catch (\Exception $e) { |
||||
126 | // nothing |
||||
127 | } |
||||
128 | |||||
129 | 3 | $date = $dateData[1]; |
|||
130 | |||||
131 | try { |
||||
132 | 3 | $date = Carbon::createFromFormat('d M Y', $date); |
|||
133 | 3 | if ($date) { |
|||
134 | 3 | $data['current_period_until'] = $date; |
|||
135 | } |
||||
136 | } catch (\Exception $e) { |
||||
137 | // nothing |
||||
138 | } |
||||
139 | } |
||||
140 | } |
||||
141 | } |
||||
142 | |||||
143 | 3 | return $data; |
|||
144 | } |
||||
145 | } |
||||
146 |
This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.
If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.