@@ -10,89 +10,89 @@ |
||
| 10 | 10 | class Crawler extends BaseCrawler implements CrawlerInterface |
| 11 | 11 | { |
| 12 | 12 | |
| 13 | - /** |
|
| 14 | - * [$selectors description] |
|
| 15 | - * @var [type] |
|
| 16 | - */ |
|
| 17 | - private $selectors = []; |
|
| 13 | + /** |
|
| 14 | + * [$selectors description] |
|
| 15 | + * @var [type] |
|
| 16 | + */ |
|
| 17 | + private $selectors = []; |
|
| 18 | 18 | |
| 19 | - /** |
|
| 20 | - * [__construct description] |
|
| 21 | - * @param [type] $html [description] |
|
| 22 | - * @param array $selectors [description] |
|
| 23 | - */ |
|
| 24 | - public function __construct($html, $selectors) |
|
| 25 | - { |
|
| 26 | - $this->selectors = $selectors; |
|
| 19 | + /** |
|
| 20 | + * [__construct description] |
|
| 21 | + * @param [type] $html [description] |
|
| 22 | + * @param array $selectors [description] |
|
| 23 | + */ |
|
| 24 | + public function __construct($html, $selectors) |
|
| 25 | + { |
|
| 26 | + $this->selectors = $selectors; |
|
| 27 | 27 | |
| 28 | - parent::__construct($html); |
|
| 29 | - } |
|
| 28 | + parent::__construct($html); |
|
| 29 | + } |
|
| 30 | 30 | |
| 31 | - /** |
|
| 32 | - * Verifica antes de fazer o crawler se possui erros |
|
| 33 | - * na requisição |
|
| 34 | - * @return boolean|null |
|
| 35 | - */ |
|
| 36 | - public function hasError() |
|
| 37 | - { |
|
| 38 | - $node = $this->scrap($this->selectors['razao_social']); |
|
| 31 | + /** |
|
| 32 | + * Verifica antes de fazer o crawler se possui erros |
|
| 33 | + * na requisição |
|
| 34 | + * @return boolean|null |
|
| 35 | + */ |
|
| 36 | + public function hasError() |
|
| 37 | + { |
|
| 38 | + $node = $this->scrap($this->selectors['razao_social']); |
|
| 39 | 39 | |
| 40 | - if (!$node->count()) |
|
| 41 | - { |
|
| 42 | - throw new ErrorFoundData($this->clearString($this->scrap($this->selectors['error'])->text()), 1); |
|
| 43 | - } |
|
| 44 | - } |
|
| 40 | + if (!$node->count()) |
|
| 41 | + { |
|
| 42 | + throw new ErrorFoundData($this->clearString($this->scrap($this->selectors['error'])->text()), 1); |
|
| 43 | + } |
|
| 44 | + } |
|
| 45 | 45 | |
| 46 | - /** |
|
| 47 | - * Extrai informações do HTML através do DOM |
|
| 48 | - * |
|
| 49 | - * @return array |
|
| 50 | - */ |
|
| 51 | - public function scraping() |
|
| 52 | - { |
|
| 53 | - $scrapped = []; |
|
| 46 | + /** |
|
| 47 | + * Extrai informações do HTML através do DOM |
|
| 48 | + * |
|
| 49 | + * @return array |
|
| 50 | + */ |
|
| 51 | + public function scraping() |
|
| 52 | + { |
|
| 53 | + $scrapped = []; |
|
| 54 | 54 | |
| 55 | - $this->hasError(); |
|
| 55 | + $this->hasError(); |
|
| 56 | 56 | |
| 57 | - foreach ($this->selectors as $name => $selector) { |
|
| 58 | - if (is_string($selector)) { |
|
| 59 | - $node = $this->scrap($selector); |
|
| 57 | + foreach ($this->selectors as $name => $selector) { |
|
| 58 | + if (is_string($selector)) { |
|
| 59 | + $node = $this->scrap($selector); |
|
| 60 | 60 | |
| 61 | - if ($node->count()) { |
|
| 62 | - $scrapped[$name] = $this->clearString($node->text()); |
|
| 63 | - } |
|
| 64 | - }elseif (is_array($selector)) { |
|
| 65 | - foreach ($selector as $selector => $repeat) { |
|
| 66 | - $node = $this->scrap($selector); |
|
| 67 | - if ($node->count()) { |
|
| 68 | - foreach ($node->filter($repeat) as $loop) |
|
| 69 | - { |
|
| 70 | - $scrapped[$name][] = $this->clearString($loop->nodeValue); |
|
| 71 | - } |
|
| 72 | - } |
|
| 73 | - } |
|
| 74 | - } |
|
| 75 | - } |
|
| 61 | + if ($node->count()) { |
|
| 62 | + $scrapped[$name] = $this->clearString($node->text()); |
|
| 63 | + } |
|
| 64 | + }elseif (is_array($selector)) { |
|
| 65 | + foreach ($selector as $selector => $repeat) { |
|
| 66 | + $node = $this->scrap($selector); |
|
| 67 | + if ($node->count()) { |
|
| 68 | + foreach ($node->filter($repeat) as $loop) |
|
| 69 | + { |
|
| 70 | + $scrapped[$name][] = $this->clearString($loop->nodeValue); |
|
| 71 | + } |
|
| 72 | + } |
|
| 73 | + } |
|
| 74 | + } |
|
| 75 | + } |
|
| 76 | 76 | |
| 77 | - return $scrapped; |
|
| 78 | - } |
|
| 77 | + return $scrapped; |
|
| 78 | + } |
|
| 79 | 79 | |
| 80 | - /** |
|
| 81 | - * Limpa o valor repassado |
|
| 82 | - * @param string $string |
|
| 83 | - * @return string |
|
| 84 | - */ |
|
| 85 | - public function clearString($string) |
|
| 86 | - { |
|
| 87 | - return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); |
|
| 88 | - } |
|
| 80 | + /** |
|
| 81 | + * Limpa o valor repassado |
|
| 82 | + * @param string $string |
|
| 83 | + * @return string |
|
| 84 | + */ |
|
| 85 | + public function clearString($string) |
|
| 86 | + { |
|
| 87 | + return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); |
|
| 88 | + } |
|
| 89 | 89 | |
| 90 | - /** |
|
| 91 | - * Filtra selector no crawler |
|
| 92 | - */ |
|
| 93 | - public function scrap($selector) |
|
| 94 | - { |
|
| 95 | - $node = $this->filter($selector); |
|
| 96 | - return $node; |
|
| 97 | - } |
|
| 90 | + /** |
|
| 91 | + * Filtra selector no crawler |
|
| 92 | + */ |
|
| 93 | + public function scrap($selector) |
|
| 94 | + { |
|
| 95 | + $node = $this->filter($selector); |
|
| 96 | + return $node; |
|
| 97 | + } |
|
| 98 | 98 | } |
| 99 | 99 | \ No newline at end of file |
@@ -7,105 +7,105 @@ |
||
| 7 | 7 | class Curl |
| 8 | 8 | { |
| 9 | 9 | |
| 10 | - /** |
|
| 11 | - * [$url description] |
|
| 12 | - * @var [type] |
|
| 13 | - */ |
|
| 14 | - private $url; |
|
| 15 | - |
|
| 16 | - /** |
|
| 17 | - * [$options description] |
|
| 18 | - * @var [type] |
|
| 19 | - */ |
|
| 20 | - private $options; |
|
| 21 | - |
|
| 22 | - /** |
|
| 23 | - * [$instance description] |
|
| 24 | - * @var [type] |
|
| 25 | - */ |
|
| 26 | - private $instance; |
|
| 27 | - |
|
| 28 | - /** |
|
| 29 | - * [$response description] |
|
| 30 | - * @var [type] |
|
| 31 | - */ |
|
| 32 | - private $response; |
|
| 33 | - |
|
| 34 | - /** |
|
| 35 | - * [init description] |
|
| 36 | - * @return [type] [description] |
|
| 37 | - */ |
|
| 38 | - public function init($url) |
|
| 39 | - { |
|
| 40 | - $this->instance = curl_init($url); |
|
| 41 | - |
|
| 42 | - $this->url = $url; |
|
| 43 | - |
|
| 44 | - return $this; |
|
| 45 | - } |
|
| 46 | - |
|
| 47 | - /** |
|
| 48 | - * [options description] |
|
| 49 | - * @param array $options [description] |
|
| 50 | - * @return [type] [description] |
|
| 51 | - */ |
|
| 52 | - public function options(array $options) |
|
| 53 | - { |
|
| 54 | - $this->options = $options; |
|
| 55 | - |
|
| 56 | - curl_setopt_array($this->instance, $this->options); |
|
| 57 | - |
|
| 58 | - return $this; |
|
| 59 | - } |
|
| 60 | - |
|
| 61 | - /** |
|
| 62 | - * [post description] |
|
| 63 | - * @return [type] [description] |
|
| 64 | - */ |
|
| 65 | - public function post(array $fields) |
|
| 66 | - { |
|
| 67 | - $this->option(CURLOPT_POST, count($fields)); |
|
| 68 | - $this->option(CURLOPT_POSTFIELDS, http_build_query($fields)); |
|
| 69 | - |
|
| 70 | - return $this; |
|
| 71 | - } |
|
| 72 | - |
|
| 73 | - /** |
|
| 74 | - * Set option in cURL |
|
| 75 | - * @param integer $option |
|
| 76 | - * @param mix $value |
|
| 77 | - */ |
|
| 78 | - public function option($option, $value) |
|
| 79 | - { |
|
| 80 | - curl_setopt($this->instance, $option, $value); |
|
| 81 | - } |
|
| 82 | - |
|
| 83 | - /** |
|
| 84 | - * [exec description] |
|
| 85 | - * @return [type] [description] |
|
| 86 | - */ |
|
| 87 | - public function exec() |
|
| 88 | - { |
|
| 89 | - $this->response = curl_exec($this->instance); |
|
| 90 | - } |
|
| 91 | - |
|
| 92 | - /** |
|
| 93 | - * [close description] |
|
| 94 | - * @return [type] [description] |
|
| 95 | - */ |
|
| 96 | - public function close() |
|
| 97 | - { |
|
| 98 | - curl_close($this->instance); |
|
| 99 | - |
|
| 100 | - return $this; |
|
| 101 | - } |
|
| 102 | - |
|
| 103 | - /** |
|
| 104 | - * [response description] |
|
| 105 | - * @return [type] [description] |
|
| 106 | - */ |
|
| 107 | - public function response() |
|
| 108 | - { |
|
| 109 | - return $this->response; |
|
| 110 | - } |
|
| 10 | + /** |
|
| 11 | + * [$url description] |
|
| 12 | + * @var [type] |
|
| 13 | + */ |
|
| 14 | + private $url; |
|
| 15 | + |
|
| 16 | + /** |
|
| 17 | + * [$options description] |
|
| 18 | + * @var [type] |
|
| 19 | + */ |
|
| 20 | + private $options; |
|
| 21 | + |
|
| 22 | + /** |
|
| 23 | + * [$instance description] |
|
| 24 | + * @var [type] |
|
| 25 | + */ |
|
| 26 | + private $instance; |
|
| 27 | + |
|
| 28 | + /** |
|
| 29 | + * [$response description] |
|
| 30 | + * @var [type] |
|
| 31 | + */ |
|
| 32 | + private $response; |
|
| 33 | + |
|
| 34 | + /** |
|
| 35 | + * [init description] |
|
| 36 | + * @return [type] [description] |
|
| 37 | + */ |
|
| 38 | + public function init($url) |
|
| 39 | + { |
|
| 40 | + $this->instance = curl_init($url); |
|
| 41 | + |
|
| 42 | + $this->url = $url; |
|
| 43 | + |
|
| 44 | + return $this; |
|
| 45 | + } |
|
| 46 | + |
|
| 47 | + /** |
|
| 48 | + * [options description] |
|
| 49 | + * @param array $options [description] |
|
| 50 | + * @return [type] [description] |
|
| 51 | + */ |
|
| 52 | + public function options(array $options) |
|
| 53 | + { |
|
| 54 | + $this->options = $options; |
|
| 55 | + |
|
| 56 | + curl_setopt_array($this->instance, $this->options); |
|
| 57 | + |
|
| 58 | + return $this; |
|
| 59 | + } |
|
| 60 | + |
|
| 61 | + /** |
|
| 62 | + * [post description] |
|
| 63 | + * @return [type] [description] |
|
| 64 | + */ |
|
| 65 | + public function post(array $fields) |
|
| 66 | + { |
|
| 67 | + $this->option(CURLOPT_POST, count($fields)); |
|
| 68 | + $this->option(CURLOPT_POSTFIELDS, http_build_query($fields)); |
|
| 69 | + |
|
| 70 | + return $this; |
|
| 71 | + } |
|
| 72 | + |
|
| 73 | + /** |
|
| 74 | + * Set option in cURL |
|
| 75 | + * @param integer $option |
|
| 76 | + * @param mix $value |
|
| 77 | + */ |
|
| 78 | + public function option($option, $value) |
|
| 79 | + { |
|
| 80 | + curl_setopt($this->instance, $option, $value); |
|
| 81 | + } |
|
| 82 | + |
|
| 83 | + /** |
|
| 84 | + * [exec description] |
|
| 85 | + * @return [type] [description] |
|
| 86 | + */ |
|
| 87 | + public function exec() |
|
| 88 | + { |
|
| 89 | + $this->response = curl_exec($this->instance); |
|
| 90 | + } |
|
| 91 | + |
|
| 92 | + /** |
|
| 93 | + * [close description] |
|
| 94 | + * @return [type] [description] |
|
| 95 | + */ |
|
| 96 | + public function close() |
|
| 97 | + { |
|
| 98 | + curl_close($this->instance); |
|
| 99 | + |
|
| 100 | + return $this; |
|
| 101 | + } |
|
| 102 | + |
|
| 103 | + /** |
|
| 104 | + * [response description] |
|
| 105 | + * @return [type] [description] |
|
| 106 | + */ |
|
| 107 | + public function response() |
|
| 108 | + { |
|
| 109 | + return $this->response; |
|
| 110 | + } |
|
| 111 | 111 | } |
| 112 | 112 | \ No newline at end of file |