Conditions | 5 |
Paths | 4 |
Total Lines | 262 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | <?php |
||
59 | public function others() |
||
60 | { |
||
61 | |||
62 | $ql = QueryList::get('https://www.google.co.jp/search?q=QueryList'); |
||
63 | |||
64 | $ql->find('title')->text(); //The page title |
||
65 | $ql->find('meta[name=keywords]')->content; //The page keywords |
||
66 | |||
67 | $ql->find('h3>a')->texts(); //Get a list of search results titles |
||
68 | $ql->find('h3>a')->attrs('href'); //Get a list of search results links |
||
69 | |||
70 | $ql->find('img')->src; //Gets the link address of the first image |
||
71 | $ql->find('img:eq(1)')->src; //Gets the link address of the second image |
||
72 | $ql->find('img')->eq(2)->src; //Gets the link address of the third image |
||
73 | // Loop all the images |
||
74 | $ql->find('img')->map( |
||
75 | function ($img) { |
||
76 | echo $img->alt; //Print the alt attribute of the image |
||
77 | } |
||
78 | ); |
||
79 | |||
80 | // More usage |
||
81 | |||
82 | $ql->find('#head')->append('<div>Append content</div>')->find('div')->htmls(); |
||
83 | $ql->find('.two')->children('img')->attrs('alt'); // Get the class is the "two" element under all img child nodes |
||
84 | // Loop class is the "two" element under all child nodes |
||
85 | $data = $ql->find('.two')->children()->map( |
||
86 | function ($item) { |
||
87 | // Use "is" to determine the node type |
||
88 | if($item->is('a')) { |
||
89 | return $item->text(); |
||
90 | }elseif($item->is('img')) { |
||
91 | return $item->alt; |
||
92 | } |
||
93 | } |
||
94 | ); |
||
95 | |||
96 | // $ql->find('a')->attr('href', 'newVal')->removeClass('className')->html('newHtml')->... |
||
97 | // $ql->find('div > p')->add('div > ul')->filter(':has(a)')->find('p:first')->nextAll()->andSelf()->... |
||
98 | // $ql->find('div.old')->replaceWith( $ql->find('div.new')->clone())->appendTo('.trash')->prepend('Deleted')->... |
||
99 | |||
100 | // List crawl |
||
101 | |||
102 | // Crawl the title and link of the Google search results list: |
||
103 | |||
104 | $data = QueryList::get('https://www.google.co.jp/search?q=QueryList') |
||
105 | // Set the crawl rules |
||
106 | ->rules( |
||
107 | [ |
||
108 | 'title'=>array('h3','text'), |
||
109 | 'link'=>array('h3>a','href') |
||
110 | ] |
||
111 | ) |
||
112 | ->query()->getData(); |
||
113 | |||
114 | print_r($data->all()); |
||
115 | |||
116 | // Results: |
||
117 | |||
118 | // Array |
||
119 | // ( |
||
120 | // [0] => Array |
||
121 | // ( |
||
122 | // [title] => Angular - QueryList |
||
123 | // [link] => https://angular.io/api/core/QueryList |
||
124 | // ) |
||
125 | // [1] => Array |
||
126 | // ( |
||
127 | // [title] => QueryList | @angular/core - Angularリファレンス - Web Creative Park |
||
128 | // [link] => http://www.webcreativepark.net/angular/querylist/ |
||
129 | // ) |
||
130 | // [2] => Array |
||
131 | // ( |
||
132 | // [title] => QueryListにQueryを追加したり、追加されたことを感知する | TIPS ... |
||
133 | // [link] => http://www.webcreativepark.net/angular/querylist_query_add_subscribe/ |
||
134 | // ) |
||
135 | // //... |
||
136 | // ) |
||
137 | |||
138 | // Encode convert |
||
139 | |||
140 | // Out charset :UTF-8 |
||
141 | // In charset :GB2312 |
||
142 | QueryList::get('https://top.etao.com')->encoding('UTF-8', 'GB2312')->find('a')->texts(); |
||
143 | |||
144 | // Out charset:UTF-8 |
||
145 | // In charset:Automatic Identification |
||
146 | QueryList::get('https://top.etao.com')->encoding('UTF-8')->find('a')->texts(); |
||
147 | |||
148 | // HTTP Client (GuzzleHttp) |
||
149 | |||
150 | // Carry cookie login GitHub |
||
151 | |||
152 | //Crawl GitHub content |
||
153 | $ql = QueryList::get( |
||
154 | 'https://github.com', 'param1=testvalue & params2=somevalue', [ |
||
155 | 'headers' => [ |
||
156 | // Fill in the cookie from the browser |
||
157 | 'Cookie' => 'SINAGLOBAL=546064; wb_cmtLike_2112031=1; wvr=6;....' |
||
158 | ] |
||
159 | ] |
||
160 | ); |
||
161 | //echo $ql->getHtml(); |
||
162 | $userName = $ql->find('.header-nav-current-user>.css-truncate-target')->text(); |
||
163 | echo $userName; |
||
164 | |||
165 | // Use the Http proxy |
||
166 | |||
167 | $urlParams = ['param1' => 'testvalue','params2' => 'somevalue']; |
||
168 | $opts = [ |
||
169 | // Set the http proxy |
||
170 | 'proxy' => 'http://222.141.11.17:8118', |
||
171 | //Set the timeout time in seconds |
||
172 | 'timeout' => 30, |
||
173 | // Fake HTTP headers |
||
174 | 'headers' => [ |
||
175 | 'Referer' => 'https://querylist.cc/', |
||
176 | 'User-Agent' => 'testing/1.0', |
||
177 | 'Accept' => 'application/json', |
||
178 | 'X-Foo' => ['Bar', 'Baz'], |
||
179 | 'Cookie' => 'abc=111;xxx=222' |
||
180 | ] |
||
181 | ]; |
||
182 | $ql->get('http://httpbin.org/get', $urlParams, $opts); |
||
183 | // echo $ql->getHtml(); |
||
184 | |||
185 | // Analog login |
||
186 | |||
187 | // Post login |
||
188 | $ql = QueryList::post( |
||
189 | 'http://xxxx.com/login', [ |
||
190 | 'username' => 'admin', |
||
191 | 'password' => '123456' |
||
192 | ] |
||
193 | )->get('http://xxx.com/admin'); |
||
194 | // Crawl pages that need to be logged in to access |
||
195 | $ql->get('http://xxx.com/admin/page'); |
||
196 | //echo $ql->getHtml(); |
||
197 | |||
198 | // Submit forms |
||
199 | |||
200 | // Login GitHub |
||
201 | |||
202 | // Get the QueryList instance |
||
203 | $ql = QueryList::getInstance(); |
||
204 | // Get the login form |
||
205 | $form = $ql->get('https://github.com/login')->find('form'); |
||
206 | |||
207 | // Fill in the GitHub username and password |
||
208 | $form->find('input[name=login]')->val('your github username or email'); |
||
209 | $form->find('input[name=password]')->val('your github password'); |
||
210 | |||
211 | // Serialize the form data |
||
212 | $fromData = $form->serializeArray(); |
||
213 | $postData = []; |
||
214 | foreach ($fromData as $item) { |
||
215 | $postData[$item['name']] = $item['value']; |
||
216 | } |
||
217 | |||
218 | // Submit the login form |
||
219 | $actionUrl = 'https://github.com'.$form->attr('action'); |
||
220 | $ql->post($actionUrl, $postData); |
||
221 | // To determine whether the login is successful |
||
222 | // echo $ql->getHtml(); |
||
223 | $userName = $ql->find('.header-nav-current-user>.css-truncate-target')->text(); |
||
224 | if($userName) { |
||
225 | echo 'Login successful ! Welcome:'.$userName; |
||
226 | }else{ |
||
227 | echo 'Login failed !'; |
||
228 | } |
||
229 | |||
230 | // Bind function extension |
||
231 | |||
232 | // Customize the extension of a myHttp method: |
||
233 | |||
234 | $ql = QueryList::getInstance(); |
||
235 | |||
236 | //Bind a `myHttp` method to the QueryList object |
||
237 | $ql->bind( |
||
238 | 'myHttp', function ($url) { |
||
239 | // $this is the current QueryList object |
||
240 | $html = file_get_contents($url); |
||
241 | $this->setHtml($html); |
||
242 | return $this; |
||
243 | } |
||
244 | ); |
||
245 | |||
246 | // And then you can call by the name of the binding |
||
247 | $data = $ql->myHttp('https://toutiao.io')->find('h3 a')->texts(); |
||
248 | print_r($data->all()); |
||
249 | |||
250 | // Or package to class, and then bind: |
||
251 | |||
252 | $ql->bind( |
||
253 | 'myHttp', function ($url) { |
||
254 | return new MyHttp($this, $url); |
||
255 | } |
||
256 | ); |
||
257 | |||
258 | // Plugin used |
||
259 | |||
260 | // Use the PhantomJS plugin to crawl JavaScript dynamically rendered pages: |
||
261 | |||
262 | // Set the PhantomJS binary file path during installation |
||
263 | $ql = QueryList::use(PhantomJs::class, '/usr/local/bin/phantomjs'); |
||
264 | |||
265 | // Crawl「500px」all picture links |
||
266 | $data = $ql->browser('https://500px.com/editors')->find('img')->attrs('src'); |
||
267 | print_r($data->all()); |
||
268 | |||
269 | // Use the HTTP proxy |
||
270 | $ql->browser( |
||
271 | 'https://500px.com/editors', false, [ |
||
272 | '--proxy' => '192.168.1.42:8080', |
||
273 | '--proxy-type' => 'http' |
||
274 | ] |
||
275 | ); |
||
276 | |||
277 | // Using the CURL multithreading plug-in, multi-threaded crawling GitHub trending : |
||
278 | |||
279 | $ql = QueryList::use(CurlMulti::class); |
||
280 | $ql->curlMulti( |
||
281 | [ |
||
282 | 'https://github.com/trending/php', |
||
283 | 'https://github.com/trending/go', |
||
284 | //.....more urls |
||
285 | ] |
||
286 | ) |
||
287 | // Called if task is success |
||
288 | ->success( |
||
289 | function (QueryList $ql,CurlMulti $curl,$r) { |
||
290 | echo "Current url:{$r['info']['url']} \r\n"; |
||
291 | $data = $ql->find('h3 a')->texts(); |
||
292 | print_r($data->all()); |
||
293 | } |
||
294 | ) |
||
295 | // Task fail callback |
||
296 | ->error( |
||
297 | function ($errorInfo,CurlMulti $curl) { |
||
298 | echo "Current url:{$errorInfo['info']['url']} \r\n"; |
||
299 | print_r($errorInfo['error']); |
||
300 | } |
||
301 | ) |
||
302 | ->start( |
||
303 | [ |
||
304 | // Maximum number of threads |
||
305 | 'maxThread' => 10, |
||
306 | // Number of error retries |
||
307 | 'maxTry' => 3, |
||
308 | ] |
||
309 | ); |
||
310 | |||
311 | // Plugins |
||
312 | |||
313 | // jae-jae/QueryList-PhantomJS:Use PhantomJS to crawl Javascript dynamically rendered page. |
||
314 | // jae-jae/QueryList-CurlMulti : Curl multi threading. |
||
315 | // jae-jae/QueryList-AbsoluteUrl : Converting relative urls to absolute. |
||
316 | // jae-jae/QueryList-Rule-Google : Google searcher. |
||
317 | // jae-jae/QueryList-Rule-Baidu : Baidu searcher. |
||
318 | |||
319 | // View more QueryList plugins and QueryList-based products: QueryList Community |
||
320 | } |
||
321 | } |
||
322 |
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.