| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace App\Http\Controllers\VirtualCrawler\HDU; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | use App\Http\Controllers\VirtualCrawler\CrawlerBase; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | use App\Models\ProblemModel; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | use KubAT\PhpSimple\HtmlDomParser; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | use Auth; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | use Requests; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | use Exception; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | class POJ extends CrawlerBase  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     public $oid=6; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     private $con; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     private $imgi; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |      * Initial | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |      * @return Response | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 22 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 23 |  |  |     public function __construct($action='crawl_problem', $con='all', $cached=false) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                        
                            
            
                                    
            
            
                | 24 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 25 |  |  |         set_time_limit(0); // Pandora's box, engage! | 
            
                                                                        
                            
            
                                    
            
            
                | 26 |  |  |         if ($action=='judge_level') { | 
            
                                                                        
                            
            
                                    
            
            
                | 27 |  |  |             $this->judge_level(); | 
            
                                                                        
                            
            
                                    
            
            
                | 28 |  |  |         } else { | 
            
                                                                        
                            
            
                                    
            
            
                | 29 |  |  |             $this->ContestHunter($con); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     public function judge_level() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         // TODO | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     private static function find($pattern, $subject) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         if (preg_match($pattern, $subject, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |             return $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     private function cacheImage($dom) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         foreach ($dom->find('img') as $ele) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |             $src=str_replace('../../..', '', $ele->src); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |             if (strpos($src, '://')!==false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |                 $url=$src; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |             } elseif ($src[0]=='/') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |                 $url='http://acm.hdu.edu.cn'.$src; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |                 $url='http://acm.hdu.edu.cn/'.$src; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |             $res=Requests::get($src, ['Referer' => 'http://acm.hdu.edu.cn']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |             $ext=['image/jpeg'=>'.jpg', 'image/png'=>'.png', 'image/gif'=>'.gif', 'image/bmp'=>'.bmp']; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |             if (isset($res->headers['content-type'])) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |                 $cext=$ext[$res->headers['content-type']]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |                 $pos=strpos($ele->src, '.'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |                 if ($pos===false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |                     $cext=''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                 } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |                     $cext=substr($ele->src, $pos); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |             $fn=$this->con.'_'.($this->imgi++).$cext; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |             $dir=base_path("public/external/hdu/img"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |             if (!file_exists($dir)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |                 mkdir($dir, 0755, true); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |             file_put_contents(base_path("public/external/hdu/img/$fn"), $res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             $ele->src='/external/hdu/img/'.$fn; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         return $dom; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     public function crawler($con)  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         if($con == "all") { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             return ; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         $this->con = $con; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         $this->imgi = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         $problemModel = new ProblemModel(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         $res = Request::get("http://acm.hdu.edu.cn/showproblem.php?pid={$con}"); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         if (strpos("No such problem",$res->body) !== false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |             header('HTTP/1.1 404 Not Found'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |             die(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         }  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         else if(strpos("Invalid Parameter.",$res->body) !== false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             header('HTTP/1.1 404 Not Found'); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             die(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |         else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |             $this->pro['pcode'] = "HDU".$con; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |             $this->pro['OJ'] = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             $this->pro['contest_id'] = null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |             $this->pro['index_id'] = $con; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |             $this->pro['origin'] = "http://acm.hdu.edu.cn/showproblem.php?pid={$con}"; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |             $this->pro['title'] = find("/<h1 style='color:#1A5CC8'>([\s\S]*?)<\/h1>/",$res->body); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |             $this->pro['time_limit'] = find('/Time Limit:.*\/(.*) MS/',$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |             $this->pro['memory_limit'] = find('/Memory Limit:.*\/(.*) K/',$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |             $this->pro['solved_count'] = find("/Accepted Submission(s): ([\d+]*?)/",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |             $this->pro['input_type']='standard input'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |             $this->pro['output_type']='standard output'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |             $this->pro['description'] = cacheImage(find("/this->problem Description.*<div class=panel_content>(.*)<\/div><div class=panel_bottom>/sU",$res->body)); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |             $this->pro['input'] = find("/<div class=panel_title align=left>Input.*<div class=panel_content>(.*)<\/div><div class=panel_bottom>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |             $this->pro['output'] = find("/<div class=panel_title align=left>Output.*<div class=panel_content>(.*)<\/div><div class=panel_bottom>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |             $this->pro['sample'] = []; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |             $this->pro['sample']['sample_input'] = find("/<pre><div.*>(.*)<\/div><\/pre>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |             $this->pro['sample']['sample_output'] = find("/<div.*>Sample Output<\/div><div.*><pre><div.*>(.*)<\/div><\/pre><\/div>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |             $this->pro['note'] = find("/<i>Hint<\/i><\/div>(.*)<\/div><i style='font-size:1px'>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             $this->pro['source'] = find("/<div class=panel_title align=left>Source<\/div> (.*)<div class=panel_bottom>/sU",$res->body); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |             $this->pro['force_raw'] = 0; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |             $problem=$problemModel->pid($this->pro['pcode']); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |             if ($problem) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |                 $problemModel->clearTags($problem); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                 $new_pid=$this->update_problem($this->oid); | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |                 $new_pid=$this->insert_problem($this->oid); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |             // $problemModel->addTags($new_pid, $tag); // not present | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |         } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 129 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 130 |  |  | } |