1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SP\Crawler\Element; |
4
|
|
|
|
5
|
|
|
use GuzzleHttp\Psr7\Request; |
6
|
|
|
use GuzzleHttp\Psr7\Uri; |
7
|
|
|
use GuzzleHttp\Psr7\MultipartStream; |
8
|
|
|
use GuzzleHttp\Psr7\ServerRequest; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* @author Ivan Kerin <[email protected]> |
12
|
|
|
* @copyright 2015, Clippings Ltd. |
13
|
|
|
* @license http://spdx.org/licenses/BSD-3-Clause |
14
|
|
|
*/ |
15
|
|
|
class Form extends AbstractElement |
16
|
|
|
{ |
17
|
|
|
/** |
18
|
|
|
* @var string |
19
|
|
|
*/ |
20
|
|
|
private static $fieldsXPath = <<<FIELDS |
21
|
|
|
//*[not(@disabled) and ( |
22
|
|
|
(self::input and @type = 'radio' and @checked) |
23
|
|
|
or (self::input and @type = 'checkbox' and @checked) |
24
|
|
|
or (self::input and @type != 'radio' and @type != 'file' and @type != 'checkbox' and @type != 'submit') |
25
|
|
|
or (self::input and not(@type)) |
26
|
|
|
or self::select |
27
|
|
|
or self::textarea |
28
|
|
|
)] |
29
|
|
|
FIELDS; |
30
|
|
|
|
31
|
|
|
private static $filesXPath = "//input[not(@disabled) and @type = 'file' and @value]"; |
32
|
|
|
private static $allFilesXPath = "//input[not(@disabled) and @type = 'file']"; |
33
|
|
|
|
34
|
3 |
|
public static function toNestedParams(array $params) |
35
|
|
|
{ |
36
|
3 |
|
$flatParams = []; |
37
|
3 |
|
foreach ($params as $key => $value) { |
38
|
3 |
|
$flatParams []= $key.'='.$value; |
39
|
3 |
|
} |
40
|
|
|
|
41
|
3 |
|
$params = join('&', $flatParams); |
42
|
3 |
|
parse_str($params, $nested); |
43
|
|
|
|
44
|
3 |
|
return $nested; |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* @return string |
49
|
|
|
*/ |
50
|
1 |
|
public static function getFieldsXPath() |
51
|
|
|
{ |
52
|
1 |
|
return self::$fieldsXPath; |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @return string |
57
|
|
|
*/ |
58
|
1 |
|
public static function getFilesXPath() |
59
|
|
|
{ |
60
|
1 |
|
return self::$filesXPath; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
private $multipartBoundary = null; |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @param string $multipartBoundary |
67
|
|
|
*/ |
68
|
1 |
|
public function setMultipartBoundary($multipartBoundary) |
69
|
|
|
{ |
70
|
1 |
|
$this->multipartBoundary = $multipartBoundary; |
71
|
1 |
|
} |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* @return string |
75
|
|
|
*/ |
76
|
1 |
|
public function getMultipartBoundary() |
77
|
|
|
{ |
78
|
1 |
|
if (null === $this->multipartBoundary) { |
79
|
1 |
|
$this->multipartBoundary = '----SpiderlingCrawler'.uniqid(); |
80
|
1 |
|
} |
81
|
|
|
|
82
|
1 |
|
return $this->multipartBoundary; |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* @param string $xpath |
87
|
|
|
* @return AbstractInput[] |
88
|
|
|
*/ |
89
|
1 |
|
public function getInputs($xpath) |
90
|
|
|
{ |
91
|
1 |
|
return array_map( |
92
|
1 |
|
[$this->getReader(), 'getInput'], |
93
|
1 |
|
iterator_to_array($this->getReader()->query($xpath), false) |
94
|
1 |
|
); |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* @return string |
99
|
|
|
*/ |
100
|
1 |
|
public function getMethod() |
101
|
|
|
{ |
102
|
1 |
|
return $this->getAttribute('method') ?: 'GET'; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* @return boolean |
107
|
|
|
*/ |
108
|
1 |
|
public function isGet() |
109
|
|
|
{ |
110
|
1 |
|
return strtoupper($this->getMethod()) === 'GET'; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* @return string |
115
|
|
|
*/ |
116
|
1 |
|
public function getAction() |
117
|
|
|
{ |
118
|
1 |
|
return $this->getAttribute('action'); |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* @return boolean |
123
|
|
|
*/ |
124
|
1 |
|
public function isMultipart() |
125
|
|
|
{ |
126
|
1 |
|
return $this->getAttribute('enctype') === 'multipart/form-data'; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* @param array $additional |
131
|
|
|
* @return array |
132
|
|
|
*/ |
133
|
1 |
|
public function getData(array $additional = []) |
134
|
|
|
{ |
135
|
1 |
|
$data = []; |
136
|
|
|
|
137
|
1 |
|
foreach ($this->getInputs(self::$fieldsXPath) as $input) { |
138
|
1 |
|
$data[$input->getName()] = $input->getValue(); |
139
|
1 |
|
} |
140
|
|
|
|
141
|
1 |
|
return array_merge($data, $additional); |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* @param array $additional |
146
|
|
|
* @return array |
147
|
|
|
*/ |
148
|
1 |
|
public function getMultipartData(array $additional = []) |
149
|
|
|
{ |
150
|
1 |
|
$data = []; |
151
|
|
|
|
152
|
1 |
|
foreach ($this->getData($additional) as $name => $value) { |
153
|
1 |
|
$data []= [ |
154
|
1 |
|
'name' => $name, |
155
|
|
|
'contents' => $value |
156
|
1 |
|
]; |
157
|
1 |
|
} |
158
|
|
|
|
159
|
1 |
|
foreach ($this->getInputs(self::$filesXPath) as $input) { |
160
|
1 |
|
$data []= [ |
161
|
1 |
|
'name' => $input->getName(), |
162
|
1 |
|
'contents' => fopen($input->getValue(), 'r'), |
163
|
1 |
|
'filename' => $input->getValue(), |
164
|
|
|
]; |
165
|
1 |
|
} |
166
|
|
|
|
167
|
1 |
|
return $data; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* @return array |
172
|
|
|
*/ |
173
|
3 |
|
public function getHeaders() |
174
|
|
|
{ |
175
|
3 |
|
if ($this->isGet()) { |
176
|
1 |
|
return []; |
177
|
2 |
|
} elseif ($this->isMultipart()) { |
178
|
1 |
|
return ['Content-Type' => 'multipart/form-data; boundary='.$this->getMultipartBoundary()]; |
179
|
|
|
} else { |
180
|
1 |
|
return ['Content-Type' => 'application/x-www-form-urlencoded']; |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
/** |
185
|
|
|
* @return array |
186
|
|
|
*/ |
187
|
1 |
|
public function getFiles() |
188
|
|
|
{ |
189
|
1 |
|
$files = []; |
190
|
|
|
|
191
|
1 |
|
foreach ($this->getInputs(self::$allFilesXPath) as $input) { |
192
|
1 |
|
foreach ($input->getPhpFileArray() as $key => $value) { |
193
|
1 |
|
$files[$input->getName()."[$key]"] = $value; |
194
|
1 |
|
} |
195
|
1 |
|
} |
196
|
|
|
|
197
|
1 |
|
return self::toNestedParams($files); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* @param array $data |
202
|
|
|
* @return ServerRequest |
203
|
|
|
*/ |
204
|
3 |
|
public function getRequest(array $data = []) |
205
|
|
|
{ |
206
|
3 |
|
$method = $this->getMethod(); |
207
|
3 |
|
$uri = new Uri($this->getAction()); |
208
|
3 |
|
$body = null; |
209
|
|
|
|
210
|
3 |
|
if ($this->isGet()) { |
211
|
1 |
|
foreach ($this->getData($data) as $key => $value) { |
212
|
1 |
|
$uri = Uri::withQueryValue($uri, $key, $value); |
213
|
1 |
|
} |
214
|
3 |
|
} elseif ($this->isMultipart()) { |
215
|
1 |
|
$body = new MultipartStream($this->getMultipartData($data), $this->getMultipartBoundary()); |
216
|
1 |
|
} else { |
217
|
1 |
|
$body = http_build_query($this->getData($data), null, '&'); |
218
|
|
|
} |
219
|
|
|
|
220
|
3 |
|
$request = new ServerRequest($method, $uri, $this->getHeaders(), $body); |
221
|
|
|
|
222
|
3 |
|
$files = $this->getFiles(); |
223
|
|
|
|
224
|
|
|
return $request |
225
|
3 |
|
->withParsedBody(self::toNestedParams($this->getData($data))) |
226
|
3 |
|
->withAttribute('FILES', $files) |
227
|
3 |
|
->withUploadedFiles(ServerRequest::normalizeFiles($files)); |
|
|
|
|
228
|
|
|
} |
229
|
|
|
} |
230
|
|
|
|
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.