| @@ -60,7 +60,6 @@ discard block | ||
| 60 | 60 | |
| 61 | 61 | /** | 
| 62 | 62 | * [post description] | 
| 63 | - * @param [type] $params [description] | |
| 64 | 63 | * @return [type] [description] | 
| 65 | 64 | */ | 
| 66 | 65 | public function post(array $fields) | 
| @@ -73,7 +72,7 @@ discard block | ||
| 73 | 72 | |
| 74 | 73 | /** | 
| 75 | 74 | * Set option in cURL | 
| 76 | - * @param mix $option | |
| 75 | + * @param integer $option | |
| 77 | 76 | * @param mix $value | 
| 78 | 77 | */ | 
| 79 | 78 | public function option($option, $value) | 
| @@ -7,106 +7,106 @@ | ||
| 7 | 7 | class Curl | 
| 8 | 8 |  { | 
| 9 | 9 | |
| 10 | - /** | |
| 11 | - * [$url description] | |
| 12 | - * @var [type] | |
| 13 | - */ | |
| 14 | - private $url; | |
| 15 | - | |
| 16 | - /** | |
| 17 | - * [$options description] | |
| 18 | - * @var [type] | |
| 19 | - */ | |
| 20 | - private $options; | |
| 21 | - | |
| 22 | - /** | |
| 23 | - * [$instance description] | |
| 24 | - * @var [type] | |
| 25 | - */ | |
| 26 | - private $instance; | |
| 27 | - | |
| 28 | - /** | |
| 29 | - * [$response description] | |
| 30 | - * @var [type] | |
| 31 | - */ | |
| 32 | - private $response; | |
| 33 | - | |
| 34 | - /** | |
| 35 | - * [init description] | |
| 36 | - * @return [type] [description] | |
| 37 | - */ | |
| 38 | - public function init($url) | |
| 39 | -    { | |
| 40 | - $this->instance = curl_init($url); | |
| 41 | - | |
| 42 | - $this->url = $url; | |
| 43 | - | |
| 44 | - return $this; | |
| 45 | - } | |
| 46 | - | |
| 47 | - /** | |
| 48 | - * [options description] | |
| 49 | - * @param array $options [description] | |
| 50 | - * @return [type] [description] | |
| 51 | - */ | |
| 52 | - public function options(array $options) | |
| 53 | -    { | |
| 54 | - $this->options = $options; | |
| 55 | - | |
| 56 | - curl_setopt_array($this->instance, $this->options); | |
| 57 | - | |
| 58 | - return $this; | |
| 59 | - } | |
| 60 | - | |
| 61 | - /** | |
| 62 | - * [post description] | |
| 63 | - * @param [type] $params [description] | |
| 64 | - * @return [type] [description] | |
| 65 | - */ | |
| 66 | - public function post(array $fields) | |
| 67 | -    { | |
| 68 | - $this->option(CURLOPT_POST, count($fields)); | |
| 69 | - $this->option(CURLOPT_POSTFIELDS, http_build_query($fields)); | |
| 70 | - | |
| 71 | - return $this; | |
| 72 | - } | |
| 73 | - | |
| 74 | - /** | |
| 75 | - * Set option in cURL | |
| 76 | - * @param mix $option | |
| 77 | - * @param mix $value | |
| 78 | - */ | |
| 79 | - public function option($option, $value) | |
| 80 | -    { | |
| 81 | - curl_setopt($this->instance, $option, $value); | |
| 82 | - } | |
| 83 | - | |
| 84 | - /** | |
| 85 | - * [exec description] | |
| 86 | - * @return [type] [description] | |
| 87 | - */ | |
| 88 | - public function exec() | |
| 89 | -    { | |
| 90 | - $this->response = curl_exec($this->instance); | |
| 91 | - } | |
| 92 | - | |
| 93 | - /** | |
| 94 | - * [close description] | |
| 95 | - * @return [type] [description] | |
| 96 | - */ | |
| 97 | - public function close() | |
| 98 | -    { | |
| 99 | - curl_close($this->instance); | |
| 100 | - | |
| 101 | - return $this; | |
| 102 | - } | |
| 103 | - | |
| 104 | - /** | |
| 105 | - * [response description] | |
| 106 | - * @return [type] [description] | |
| 107 | - */ | |
| 108 | - public function response() | |
| 109 | -    { | |
| 110 | - return $this->response; | |
| 111 | - } | |
| 10 | + /** | |
| 11 | + * [$url description] | |
| 12 | + * @var [type] | |
| 13 | + */ | |
| 14 | + private $url; | |
| 15 | + | |
| 16 | + /** | |
| 17 | + * [$options description] | |
| 18 | + * @var [type] | |
| 19 | + */ | |
| 20 | + private $options; | |
| 21 | + | |
| 22 | + /** | |
| 23 | + * [$instance description] | |
| 24 | + * @var [type] | |
| 25 | + */ | |
| 26 | + private $instance; | |
| 27 | + | |
| 28 | + /** | |
| 29 | + * [$response description] | |
| 30 | + * @var [type] | |
| 31 | + */ | |
| 32 | + private $response; | |
| 33 | + | |
| 34 | + /** | |
| 35 | + * [init description] | |
| 36 | + * @return [type] [description] | |
| 37 | + */ | |
| 38 | + public function init($url) | |
| 39 | +	{ | |
| 40 | + $this->instance = curl_init($url); | |
| 41 | + | |
| 42 | + $this->url = $url; | |
| 43 | + | |
| 44 | + return $this; | |
| 45 | + } | |
| 46 | + | |
| 47 | + /** | |
| 48 | + * [options description] | |
| 49 | + * @param array $options [description] | |
| 50 | + * @return [type] [description] | |
| 51 | + */ | |
| 52 | + public function options(array $options) | |
| 53 | +	{ | |
| 54 | + $this->options = $options; | |
| 55 | + | |
| 56 | + curl_setopt_array($this->instance, $this->options); | |
| 57 | + | |
| 58 | + return $this; | |
| 59 | + } | |
| 60 | + | |
| 61 | + /** | |
| 62 | + * [post description] | |
| 63 | + * @param [type] $params [description] | |
| 64 | + * @return [type] [description] | |
| 65 | + */ | |
| 66 | + public function post(array $fields) | |
| 67 | +	{ | |
| 68 | + $this->option(CURLOPT_POST, count($fields)); | |
| 69 | + $this->option(CURLOPT_POSTFIELDS, http_build_query($fields)); | |
| 70 | + | |
| 71 | + return $this; | |
| 72 | + } | |
| 73 | + | |
| 74 | + /** | |
| 75 | + * Set option in cURL | |
| 76 | + * @param mix $option | |
| 77 | + * @param mix $value | |
| 78 | + */ | |
| 79 | + public function option($option, $value) | |
| 80 | +	{ | |
| 81 | + curl_setopt($this->instance, $option, $value); | |
| 82 | + } | |
| 83 | + | |
| 84 | + /** | |
| 85 | + * [exec description] | |
| 86 | + * @return [type] [description] | |
| 87 | + */ | |
| 88 | + public function exec() | |
| 89 | +	{ | |
| 90 | + $this->response = curl_exec($this->instance); | |
| 91 | + } | |
| 92 | + | |
| 93 | + /** | |
| 94 | + * [close description] | |
| 95 | + * @return [type] [description] | |
| 96 | + */ | |
| 97 | + public function close() | |
| 98 | +	{ | |
| 99 | + curl_close($this->instance); | |
| 100 | + | |
| 101 | + return $this; | |
| 102 | + } | |
| 103 | + | |
| 104 | + /** | |
| 105 | + * [response description] | |
| 106 | + * @return [type] [description] | |
| 107 | + */ | |
| 108 | + public function response() | |
| 109 | +	{ | |
| 110 | + return $this->response; | |
| 111 | + } | |
| 112 | 112 | } | 
| 113 | 113 | \ No newline at end of file | 
| @@ -11,13 +11,13 @@ discard block | ||
| 11 | 11 | |
| 12 | 12 | /** | 
| 13 | 13 | * [search description] | 
| 14 | - * @return [type] [description] | |
| 14 | + * @return string [description] | |
| 15 | 15 | */ | 
| 16 | 16 | public function getCaptcha(); | 
| 17 | 17 | |
| 18 | 18 | /** | 
| 19 | 19 | * [cookie description] | 
| 20 | - * @return [type] [description] | |
| 20 | + * @return string [description] | |
| 21 | 21 | */ | 
| 22 | 22 | public function getCookie(); | 
| 23 | 23 | |
| @@ -30,6 +30,8 @@ discard block | ||
| 30 | 30 | /** | 
| 31 | 31 | * [getData description] | 
| 32 | 32 | * @param [type] $configurations [description] | 
| 33 | + * @param string $cookie | |
| 34 | + * @param string $captcha | |
| 33 | 35 | * @return [type] [description] | 
| 34 | 36 | */ | 
| 35 | 37 | public function getData($document, $cookie, $captcha, $params, $configurations); | 
| @@ -10,13 +10,13 @@ | ||
| 10 | 10 | |
| 11 | 11 | /** | 
| 12 | 12 | * [search description] | 
| 13 | - * @return [type] [description] | |
| 13 | + * @return string [description] | |
| 14 | 14 | */ | 
| 15 | 15 | public function captcha(); | 
| 16 | 16 | |
| 17 | 17 | /** | 
| 18 | 18 | * [cookie description] | 
| 19 | - * @return [type] [description] | |
| 19 | + * @return string [description] | |
| 20 | 20 | */ | 
| 21 | 21 | public function cookie(); | 
| 22 | 22 | |
| @@ -1,7 +1,5 @@ | ||
| 1 | 1 | <?php namespace zServices\ReceitaFederal; | 
| 2 | 2 | |
| 3 | -use zServices\Miscellany\Exceptions\InvalidService; | |
| 4 | -use zServices\Miscellany\Interfaces\ServiceInterface; | |
| 5 | 3 | use zServices\ReceitaFederal\Services\Portais\AN\Service; | 
| 6 | 4 | |
| 7 | 5 | /** | 
| @@ -33,7 +33,7 @@ | ||
| 33 | 33 | /** | 
| 34 | 34 | * Verifica antes de fazer o crawler se possui erros | 
| 35 | 35 | * na requisição | 
| 36 | - * @return boolean | |
| 36 | + * @return boolean|null | |
| 37 | 37 | */ | 
| 38 | 38 | public function hasError() | 
| 39 | 39 |      { | 
| @@ -12,99 +12,99 @@ | ||
| 12 | 12 | class Crawler extends BaseCrawler implements CrawlerInterface | 
| 13 | 13 |  { | 
| 14 | 14 | |
| 15 | - /** | |
| 16 | - * [$selectors description] | |
| 17 | - * @var [type] | |
| 18 | - */ | |
| 19 | - private $selectors = []; | |
| 15 | + /** | |
| 16 | + * [$selectors description] | |
| 17 | + * @var [type] | |
| 18 | + */ | |
| 19 | + private $selectors = []; | |
| 20 | 20 | |
| 21 | - /** | |
| 22 | - * [__construct description] | |
| 23 | - * @param [type] $html [description] | |
| 24 | - * @param array $selectors [description] | |
| 25 | - */ | |
| 26 | - public function __construct($html, $selectors) | |
| 27 | -    { | |
| 28 | - $this->selectors = $selectors; | |
| 21 | + /** | |
| 22 | + * [__construct description] | |
| 23 | + * @param [type] $html [description] | |
| 24 | + * @param array $selectors [description] | |
| 25 | + */ | |
| 26 | + public function __construct($html, $selectors) | |
| 27 | +	{ | |
| 28 | + $this->selectors = $selectors; | |
| 29 | 29 | |
| 30 | - parent::__construct($html); | |
| 31 | - } | |
| 30 | + parent::__construct($html); | |
| 31 | + } | |
| 32 | 32 | |
| 33 | - /** | |
| 34 | - * Verifica antes de fazer o crawler se possui erros | |
| 35 | - * na requisição | |
| 36 | - * @return boolean | |
| 37 | - */ | |
| 38 | - public function hasError() | |
| 39 | -    { | |
| 40 | -        if(count( $this->selectors) == 0) { | |
| 41 | -            throw new NoSelectorsConfigured("NoSelectorsConfigured", 1); | |
| 42 | - } | |
| 33 | + /** | |
| 34 | + * Verifica antes de fazer o crawler se possui erros | |
| 35 | + * na requisição | |
| 36 | + * @return boolean | |
| 37 | + */ | |
| 38 | + public function hasError() | |
| 39 | +	{ | |
| 40 | +		if(count( $this->selectors) == 0) { | |
| 41 | +			throw new NoSelectorsConfigured("NoSelectorsConfigured", 1); | |
| 42 | + } | |
| 43 | 43 | |
| 44 | - // verifica se a página seguida na requisição | |
| 45 | - // é página de erro da receita federal | |
| 46 | - $node = $this->filter($this->selectors['error']); | |
| 44 | + // verifica se a página seguida na requisição | |
| 45 | + // é página de erro da receita federal | |
| 46 | + $node = $this->filter($this->selectors['error']); | |
| 47 | 47 | |
| 48 | -        if($node->count()){ | |
| 49 | - throw new ErrorFoundData( $this->clearString($node->text()), 1); | |
| 50 | - } | |
| 48 | +		if($node->count()){ | |
| 49 | + throw new ErrorFoundData( $this->clearString($node->text()), 1); | |
| 50 | + } | |
| 51 | 51 | |
| 52 | - // CNPJ informado é válido? | |
| 53 | -        if($this->filter('#imgCaptcha')->count()){ | |
| 54 | -            throw new InvalidCaptcha('Captcha inválido', 99); | |
| 55 | - } | |
| 56 | - } | |
| 52 | + // CNPJ informado é válido? | |
| 53 | +		if($this->filter('#imgCaptcha')->count()){ | |
| 54 | +			throw new InvalidCaptcha('Captcha inválido', 99); | |
| 55 | + } | |
| 56 | + } | |
| 57 | 57 | |
| 58 | - /** | |
| 59 | - * Extrai informações do HTML através do DOM | |
| 60 | - * | |
| 61 | - * @return array | |
| 62 | - */ | |
| 63 | - public function scraping() | |
| 64 | -    { | |
| 65 | - $scrapped = []; | |
| 58 | + /** | |
| 59 | + * Extrai informações do HTML através do DOM | |
| 60 | + * | |
| 61 | + * @return array | |
| 62 | + */ | |
| 63 | + public function scraping() | |
| 64 | +	{ | |
| 65 | + $scrapped = []; | |
| 66 | 66 | |
| 67 | - $this->hasError(); | |
| 67 | + $this->hasError(); | |
| 68 | 68 | |
| 69 | -        foreach ($this->selectors as $name => $selector) { | |
| 70 | -            if(is_string($selector)){ | |
| 71 | - $node = $this->scrap($selector); | |
| 69 | +		foreach ($this->selectors as $name => $selector) { | |
| 70 | +			if(is_string($selector)){ | |
| 71 | + $node = $this->scrap($selector); | |
| 72 | 72 | |
| 73 | -                if($node->count()){ | |
| 74 | - $scrapped[$name] = $this->clearString($node->text()); | |
| 75 | - } | |
| 76 | -            }elseif(is_array($selector)){ | |
| 77 | -                foreach ($selector as $selector => $repeat) { | |
| 78 | - $node = $this->scrap($selector); | |
| 79 | -                    if($node->count()){ | |
| 80 | - foreach ($node->filter($repeat) as $loop) | |
| 81 | -                        { | |
| 82 | - $scrapped[$name][] = $this->clearString($loop->nodeValue); | |
| 83 | - } | |
| 84 | - } | |
| 85 | - } | |
| 86 | - } | |
| 87 | - } | |
| 73 | +				if($node->count()){ | |
| 74 | + $scrapped[$name] = $this->clearString($node->text()); | |
| 75 | + } | |
| 76 | +			}elseif(is_array($selector)){ | |
| 77 | +				foreach ($selector as $selector => $repeat) { | |
| 78 | + $node = $this->scrap($selector); | |
| 79 | +					if($node->count()){ | |
| 80 | + foreach ($node->filter($repeat) as $loop) | |
| 81 | +						{ | |
| 82 | + $scrapped[$name][] = $this->clearString($loop->nodeValue); | |
| 83 | + } | |
| 84 | + } | |
| 85 | + } | |
| 86 | + } | |
| 87 | + } | |
| 88 | 88 | |
| 89 | - return $scrapped; | |
| 90 | - } | |
| 89 | + return $scrapped; | |
| 90 | + } | |
| 91 | 91 | |
| 92 | - /** | |
| 93 | - * Limpa o valor repassado | |
| 94 | - * @param string $string | |
| 95 | - * @return string | |
| 96 | - */ | |
| 97 | - public function clearString($string) | |
| 98 | -    { | |
| 99 | - return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); | |
| 100 | - } | |
| 92 | + /** | |
| 93 | + * Limpa o valor repassado | |
| 94 | + * @param string $string | |
| 95 | + * @return string | |
| 96 | + */ | |
| 97 | + public function clearString($string) | |
| 98 | +	{ | |
| 99 | + return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); | |
| 100 | + } | |
| 101 | 101 | |
| 102 | - /** | |
| 103 | - * Filtra selector no crawler | |
| 104 | - */ | |
| 105 | - public function scrap($selector) | |
| 106 | -    { | |
| 107 | - $node = $this->filter($selector); | |
| 108 | - return $node; | |
| 109 | - } | |
| 102 | + /** | |
| 103 | + * Filtra selector no crawler | |
| 104 | + */ | |
| 105 | + public function scrap($selector) | |
| 106 | +	{ | |
| 107 | + $node = $this->filter($selector); | |
| 108 | + return $node; | |
| 109 | + } | |
| 110 | 110 | } | 
| 111 | 111 | \ No newline at end of file | 
| @@ -37,7 +37,7 @@ discard block | ||
| 37 | 37 | */ | 
| 38 | 38 | public function hasError() | 
| 39 | 39 |      { | 
| 40 | -        if(count( $this->selectors) == 0) { | |
| 40 | +        if (count($this->selectors) == 0) { | |
| 41 | 41 |              throw new NoSelectorsConfigured("NoSelectorsConfigured", 1); | 
| 42 | 42 | } | 
| 43 | 43 | |
| @@ -45,12 +45,12 @@ discard block | ||
| 45 | 45 | // é página de erro da receita federal | 
| 46 | 46 | $node = $this->filter($this->selectors['error']); | 
| 47 | 47 | |
| 48 | -        if($node->count()){ | |
| 49 | - throw new ErrorFoundData( $this->clearString($node->text()), 1); | |
| 48 | +        if ($node->count()) { | |
| 49 | + throw new ErrorFoundData($this->clearString($node->text()), 1); | |
| 50 | 50 | } | 
| 51 | 51 | |
| 52 | 52 | // CNPJ informado é válido? | 
| 53 | -        if($this->filter('#imgCaptcha')->count()){ | |
| 53 | +        if ($this->filter('#imgCaptcha')->count()) { | |
| 54 | 54 |              throw new InvalidCaptcha('Captcha inválido', 99); | 
| 55 | 55 | } | 
| 56 | 56 | } | 
| @@ -67,16 +67,16 @@ discard block | ||
| 67 | 67 | $this->hasError(); | 
| 68 | 68 | |
| 69 | 69 |          foreach ($this->selectors as $name => $selector) { | 
| 70 | -            if(is_string($selector)){ | |
| 70 | +            if (is_string($selector)) { | |
| 71 | 71 | $node = $this->scrap($selector); | 
| 72 | 72 | |
| 73 | -                if($node->count()){ | |
| 73 | +                if ($node->count()) { | |
| 74 | 74 | $scrapped[$name] = $this->clearString($node->text()); | 
| 75 | 75 | } | 
| 76 | -            }elseif(is_array($selector)){ | |
| 76 | +            }elseif (is_array($selector)) { | |
| 77 | 77 |                  foreach ($selector as $selector => $repeat) { | 
| 78 | 78 | $node = $this->scrap($selector); | 
| 79 | -                    if($node->count()){ | |
| 79 | +                    if ($node->count()) { | |
| 80 | 80 | foreach ($node->filter($repeat) as $loop) | 
| 81 | 81 |                          { | 
| 82 | 82 | $scrapped[$name][] = $this->clearString($loop->nodeValue); | 
| @@ -73,7 +73,7 @@ | ||
| 73 | 73 |                  if($node->count()){ | 
| 74 | 74 | $scrapped[$name] = $this->clearString($node->text()); | 
| 75 | 75 | } | 
| 76 | -            }elseif(is_array($selector)){ | |
| 76 | +            } elseif(is_array($selector)){ | |
| 77 | 77 |                  foreach ($selector as $selector => $repeat) { | 
| 78 | 78 | $node = $this->scrap($selector); | 
| 79 | 79 |                      if($node->count()){ | 
| @@ -32,7 +32,7 @@ | ||
| 32 | 32 | /** | 
| 33 | 33 | * Verifica antes de fazer o crawler se possui erros | 
| 34 | 34 | * na requisição | 
| 35 | - * @return boolean | |
| 35 | + * @return boolean|null | |
| 36 | 36 | */ | 
| 37 | 37 | public function hasError() | 
| 38 | 38 |      { | 
| @@ -2,7 +2,6 @@ | ||
| 2 | 2 | |
| 3 | 3 | use zServices\Miscellany\Interfaces\CrawlerInterface; | 
| 4 | 4 | use Symfony\Component\DomCrawler\Crawler as BaseCrawler; | 
| 5 | -use zServices\Miscellany\Exceptions\NoSelectorsConfigured; | |
| 6 | 5 | use zServices\Miscellany\Exceptions\ErrorFoundData; | 
| 7 | 6 | |
| 8 | 7 | /** | 
| @@ -11,89 +11,89 @@ | ||
| 11 | 11 | class Crawler extends BaseCrawler implements CrawlerInterface | 
| 12 | 12 |  { | 
| 13 | 13 | |
| 14 | - /** | |
| 15 | - * [$selectors description] | |
| 16 | - * @var [type] | |
| 17 | - */ | |
| 18 | - private $selectors = []; | |
| 14 | + /** | |
| 15 | + * [$selectors description] | |
| 16 | + * @var [type] | |
| 17 | + */ | |
| 18 | + private $selectors = []; | |
| 19 | 19 | |
| 20 | - /** | |
| 21 | - * [__construct description] | |
| 22 | - * @param [type] $html [description] | |
| 23 | - * @param array $selectors [description] | |
| 24 | - */ | |
| 25 | - public function __construct($html, $selectors) | |
| 26 | -    { | |
| 27 | - $this->selectors = $selectors; | |
| 20 | + /** | |
| 21 | + * [__construct description] | |
| 22 | + * @param [type] $html [description] | |
| 23 | + * @param array $selectors [description] | |
| 24 | + */ | |
| 25 | + public function __construct($html, $selectors) | |
| 26 | +	{ | |
| 27 | + $this->selectors = $selectors; | |
| 28 | 28 | |
| 29 | - parent::__construct($html); | |
| 30 | - } | |
| 29 | + parent::__construct($html); | |
| 30 | + } | |
| 31 | 31 | |
| 32 | - /** | |
| 33 | - * Verifica antes de fazer o crawler se possui erros | |
| 34 | - * na requisição | |
| 35 | - * @return boolean | |
| 36 | - */ | |
| 37 | - public function hasError() | |
| 38 | -    { | |
| 39 | - $node = $this->scrap($this->selectors['razao_social']); | |
| 32 | + /** | |
| 33 | + * Verifica antes de fazer o crawler se possui erros | |
| 34 | + * na requisição | |
| 35 | + * @return boolean | |
| 36 | + */ | |
| 37 | + public function hasError() | |
| 38 | +	{ | |
| 39 | + $node = $this->scrap($this->selectors['razao_social']); | |
| 40 | 40 | |
| 41 | - if(!$node->count()) | |
| 42 | -        { | |
| 43 | - throw new ErrorFoundData($this->clearString($this->scrap($this->selectors['error'])->text()), 1); | |
| 44 | - } | |
| 45 | - } | |
| 41 | + if(!$node->count()) | |
| 42 | +		{ | |
| 43 | + throw new ErrorFoundData($this->clearString($this->scrap($this->selectors['error'])->text()), 1); | |
| 44 | + } | |
| 45 | + } | |
| 46 | 46 | |
| 47 | - /** | |
| 48 | - * Extrai informações do HTML através do DOM | |
| 49 | - * | |
| 50 | - * @return array | |
| 51 | - */ | |
| 52 | - public function scraping() | |
| 53 | -    { | |
| 54 | - $scrapped = []; | |
| 47 | + /** | |
| 48 | + * Extrai informações do HTML através do DOM | |
| 49 | + * | |
| 50 | + * @return array | |
| 51 | + */ | |
| 52 | + public function scraping() | |
| 53 | +	{ | |
| 54 | + $scrapped = []; | |
| 55 | 55 | |
| 56 | - $this->hasError(); | |
| 56 | + $this->hasError(); | |
| 57 | 57 | |
| 58 | -        foreach ($this->selectors as $name => $selector) { | |
| 59 | -            if(is_string($selector)){ | |
| 60 | - $node = $this->scrap($selector); | |
| 58 | +		foreach ($this->selectors as $name => $selector) { | |
| 59 | +			if(is_string($selector)){ | |
| 60 | + $node = $this->scrap($selector); | |
| 61 | 61 | |
| 62 | -                if($node->count()){ | |
| 63 | - $scrapped[$name] = $this->clearString($node->text()); | |
| 64 | - } | |
| 65 | -            }elseif(is_array($selector)){ | |
| 66 | -                foreach ($selector as $selector => $repeat) { | |
| 67 | - $node = $this->scrap($selector); | |
| 68 | -                    if($node->count()){ | |
| 69 | - foreach ($node->filter($repeat) as $loop) | |
| 70 | -                        { | |
| 71 | - $scrapped[$name][] = $this->clearString($loop->nodeValue); | |
| 72 | - } | |
| 73 | - } | |
| 74 | - } | |
| 75 | - } | |
| 76 | - } | |
| 62 | +				if($node->count()){ | |
| 63 | + $scrapped[$name] = $this->clearString($node->text()); | |
| 64 | + } | |
| 65 | +			}elseif(is_array($selector)){ | |
| 66 | +				foreach ($selector as $selector => $repeat) { | |
| 67 | + $node = $this->scrap($selector); | |
| 68 | +					if($node->count()){ | |
| 69 | + foreach ($node->filter($repeat) as $loop) | |
| 70 | +						{ | |
| 71 | + $scrapped[$name][] = $this->clearString($loop->nodeValue); | |
| 72 | + } | |
| 73 | + } | |
| 74 | + } | |
| 75 | + } | |
| 76 | + } | |
| 77 | 77 | |
| 78 | - return $scrapped; | |
| 79 | - } | |
| 78 | + return $scrapped; | |
| 79 | + } | |
| 80 | 80 | |
| 81 | - /** | |
| 82 | - * Limpa o valor repassado | |
| 83 | - * @param string $string | |
| 84 | - * @return string | |
| 85 | - */ | |
| 86 | - public function clearString($string) | |
| 87 | -    { | |
| 88 | - return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); | |
| 89 | - } | |
| 81 | + /** | |
| 82 | + * Limpa o valor repassado | |
| 83 | + * @param string $string | |
| 84 | + * @return string | |
| 85 | + */ | |
| 86 | + public function clearString($string) | |
| 87 | +	{ | |
| 88 | + return trim(preg_replace(['/[\s]+/mu'], ' ', $string)); | |
| 89 | + } | |
| 90 | 90 | |
| 91 | - /** | |
| 92 | - * Filtra selector no crawler | |
| 93 | - */ | |
| 94 | - public function scrap($selector) | |
| 95 | -    { | |
| 96 | - $node = $this->filter($selector); | |
| 97 | - return $node; | |
| 98 | - } | |
| 91 | + /** | |
| 92 | + * Filtra selector no crawler | |
| 93 | + */ | |
| 94 | + public function scrap($selector) | |
| 95 | +	{ | |
| 96 | + $node = $this->filter($selector); | |
| 97 | + return $node; | |
| 98 | + } | |
| 99 | 99 | } | 
| 100 | 100 | \ No newline at end of file | 
| @@ -38,7 +38,7 @@ discard block | ||
| 38 | 38 |      { | 
| 39 | 39 | $node = $this->scrap($this->selectors['razao_social']); | 
| 40 | 40 | |
| 41 | - if(!$node->count()) | |
| 41 | + if (!$node->count()) | |
| 42 | 42 |          { | 
| 43 | 43 | throw new ErrorFoundData($this->clearString($this->scrap($this->selectors['error'])->text()), 1); | 
| 44 | 44 | } | 
| @@ -56,16 +56,16 @@ discard block | ||
| 56 | 56 | $this->hasError(); | 
| 57 | 57 | |
| 58 | 58 |          foreach ($this->selectors as $name => $selector) { | 
| 59 | -            if(is_string($selector)){ | |
| 59 | +            if (is_string($selector)) { | |
| 60 | 60 | $node = $this->scrap($selector); | 
| 61 | 61 | |
| 62 | -                if($node->count()){ | |
| 62 | +                if ($node->count()) { | |
| 63 | 63 | $scrapped[$name] = $this->clearString($node->text()); | 
| 64 | 64 | } | 
| 65 | -            }elseif(is_array($selector)){ | |
| 65 | +            }elseif (is_array($selector)) { | |
| 66 | 66 |                  foreach ($selector as $selector => $repeat) { | 
| 67 | 67 | $node = $this->scrap($selector); | 
| 68 | -                    if($node->count()){ | |
| 68 | +                    if ($node->count()) { | |
| 69 | 69 | foreach ($node->filter($repeat) as $loop) | 
| 70 | 70 |                          { | 
| 71 | 71 | $scrapped[$name][] = $this->clearString($loop->nodeValue); | 
| @@ -62,7 +62,7 @@ | ||
| 62 | 62 |                  if($node->count()){ | 
| 63 | 63 | $scrapped[$name] = $this->clearString($node->text()); | 
| 64 | 64 | } | 
| 65 | -            }elseif(is_array($selector)){ | |
| 65 | +            } elseif(is_array($selector)){ | |
| 66 | 66 |                  foreach ($selector as $selector => $repeat) { | 
| 67 | 67 | $node = $this->scrap($selector); | 
| 68 | 68 |                      if($node->count()){ | 
| @@ -22,28 +22,28 @@ | ||
| 22 | 22 | 'data' => [ | 
| 23 | 23 | 'error' => 'body > table:nth-child(3) > tr:nth-child(2) > td > b > font', | 
| 24 | 24 | 'numero_inscricao' => 'body > table:nth-child(3) > tr > td > table:nth-child(3) > tr > td:nth-child(1) > font:nth-child(3) > b:nth-child(1)', | 
| 25 | - 'classificacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(3) > tr > td:nth-child(1) > font:nth-child(3) > b:nth-child(3)', | |
| 26 | - 'data_abertura' => 'body > table:nth-child(3) > tr > td > table:nth-child(3) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 27 | - 'nome_empresarial' => 'body > table:nth-child(3) > tr > td > table:nth-child(5) > tr > td > font:nth-child(3) > b', | |
| 28 | - 'nome_fantasia' => 'body > table:nth-child(3) > tr > td > table:nth-child(7) > tr > td > font:nth-child(3) > b', | |
| 29 | - 'cnae_principal' => 'body > table:nth-child(3) > tr > td > table:nth-child(9) > tr > td > font:nth-child(3) > b', | |
| 30 | - 'cnae_secundarios' => ['body > table:nth-child(3) > tr > td > table:nth-child(11) > tr > td' => 'td > font > b'], | |
| 31 | - 'natureza_juridica' => 'body > table:nth-child(3) > tr > td > table:nth-child(13) > tr > td > font:nth-child(3) > b', | |
| 32 | - 'endereco' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 33 | - 'numero' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 34 | - 'complemento' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(5) > font:nth-child(3) > b', | |
| 35 | - 'cep' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 36 | - 'distrito' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 37 | - 'municipio' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(5) > font:nth-child(3) > b', | |
| 38 | - 'uf' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(7) > font:nth-child(3) > b', | |
| 39 | - 'email' => 'body > table:nth-child(3) > tr > td > table:nth-child(19) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 40 | - 'telefone' => 'body > table:nth-child(3) > tr > td > table:nth-child(19) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 41 | - 'efr' => 'body > table:nth-child(3) > tr > td > table:nth-child(21) > tr > td > font:nth-child(3) > b', | |
| 42 | - 'situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(23) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 43 | - 'data_situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(23) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 44 | - 'motivo_situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(25) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 45 | - 'situacao_especial' => 'body > table:nth-child(3) > tr > td > table:nth-child(27) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 46 | - 'data_situacao_especial' => 'body > table:nth-child(3) > tr > td > table:nth-child(27) > tr > td:nth-child(3) > font:nth-child(3) > b' | |
| 25 | + 'classificacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(3) > tr > td:nth-child(1) > font:nth-child(3) > b:nth-child(3)', | |
| 26 | + 'data_abertura' => 'body > table:nth-child(3) > tr > td > table:nth-child(3) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 27 | + 'nome_empresarial' => 'body > table:nth-child(3) > tr > td > table:nth-child(5) > tr > td > font:nth-child(3) > b', | |
| 28 | + 'nome_fantasia' => 'body > table:nth-child(3) > tr > td > table:nth-child(7) > tr > td > font:nth-child(3) > b', | |
| 29 | + 'cnae_principal' => 'body > table:nth-child(3) > tr > td > table:nth-child(9) > tr > td > font:nth-child(3) > b', | |
| 30 | + 'cnae_secundarios' => ['body > table:nth-child(3) > tr > td > table:nth-child(11) > tr > td' => 'td > font > b'], | |
| 31 | + 'natureza_juridica' => 'body > table:nth-child(3) > tr > td > table:nth-child(13) > tr > td > font:nth-child(3) > b', | |
| 32 | + 'endereco' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 33 | + 'numero' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 34 | + 'complemento' => 'body > table:nth-child(3) > tr > td > table:nth-child(15) > tr > td:nth-child(5) > font:nth-child(3) > b', | |
| 35 | + 'cep' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 36 | + 'distrito' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 37 | + 'municipio' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(5) > font:nth-child(3) > b', | |
| 38 | + 'uf' => 'body > table:nth-child(3) > tr > td > table:nth-child(17) > tr > td:nth-child(7) > font:nth-child(3) > b', | |
| 39 | + 'email' => 'body > table:nth-child(3) > tr > td > table:nth-child(19) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 40 | + 'telefone' => 'body > table:nth-child(3) > tr > td > table:nth-child(19) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 41 | + 'efr' => 'body > table:nth-child(3) > tr > td > table:nth-child(21) > tr > td > font:nth-child(3) > b', | |
| 42 | + 'situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(23) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 43 | + 'data_situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(23) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 44 | + 'motivo_situacao' => 'body > table:nth-child(3) > tr > td > table:nth-child(25) > tr > td:nth-child(3) > font:nth-child(3) > b', | |
| 45 | + 'situacao_especial' => 'body > table:nth-child(3) > tr > td > table:nth-child(27) > tr > td:nth-child(1) > font:nth-child(3) > b', | |
| 46 | + 'data_situacao_especial' => 'body > table:nth-child(3) > tr > td > table:nth-child(27) > tr > td:nth-child(3) > font:nth-child(3) > b' | |
| 47 | 47 | ] | 
| 48 | 48 | ], | 
| 49 | 49 | 'headers' => [ | 
| @@ -71,13 +71,13 @@ discard block | ||
| 71 | 71 | $this->configurations = $configurations; | 
| 72 | 72 | |
| 73 | 73 | // instancia o client http | 
| 74 | - $this->client = new ClientHttp(); | |
| 74 | + $this->client = new ClientHttp(); | |
| 75 | 75 | |
| 76 | - // Executa um request para URL do serviço, retornando o cookie da requisição primária | |
| 77 | -        $this->instanceResponse = $this->client->request('GET', $this->configurations['home']); | |
| 76 | + // Executa um request para URL do serviço, retornando o cookie da requisição primária | |
| 77 | +		$this->instanceResponse = $this->client->request('GET', $this->configurations['home']); | |
| 78 | 78 | |
| 79 | - // Captura o cookie da requisição, será usuado posteriormente | |
| 80 | - $this->cookie = $this->client->cookie(); | |
| 79 | + // Captura o cookie da requisição, será usuado posteriormente | |
| 80 | + $this->cookie = $this->client->cookie(); | |
| 81 | 81 | |
| 82 | 82 | return $this; | 
| 83 | 83 | } | 
| @@ -104,57 +104,57 @@ discard block | ||
| 104 | 104 | $this->hasRequested(); | 
| 105 | 105 | |
| 106 | 106 | // Inicia instancia do cURL | 
| 107 | - $curl = new Curl; | |
| 108 | - | |
| 109 | - // Inicia uma requisição para capturar a imagem do captcha | |
| 110 | - // informando cookie da requisição passada e os headers | |
| 111 | - // | |
| 112 | - // to-do: implementar guzzlehttp? | |
| 113 | - // ele é melhor que o curl? ou mais organizado? | |
| 114 | - $curl->init($this->configurations['captcha']); | |
| 115 | - | |
| 116 | - // headers da requisição | |
| 117 | - $curl->options([ | |
| 118 | - CURLOPT_COOKIEJAR => 'cookiejar', | |
| 119 | - CURLOPT_HTTPHEADER => array( | |
| 120 | - "Pragma: no-cache", | |
| 121 | - "Origin: " . $this->configurations['base'], | |
| 122 | - "Host: ". array_get($this->configurations, 'headers.Host'), | |
| 123 | - "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | |
| 124 | - "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| 125 | - "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | |
| 126 | - "Accept-Encoding: gzip, deflate", | |
| 127 | - "Referer: " . $this->configurations['home'], | |
| 128 | - "Cookie: flag=1; ". $this->cookie, | |
| 129 | - "Connection: keep-alive" | |
| 130 | - ), | |
| 131 | - CURLOPT_RETURNTRANSFER => true, | |
| 132 | - CURLOPT_FOLLOWLOCATION => 1, | |
| 133 | - CURLOPT_BINARYTRANSFER => TRUE, | |
| 134 | - CURLOPT_CONNECTTIMEOUT => 10, | |
| 135 | - CURLOPT_TIMEOUT => 10, | |
| 136 | - ]); | |
| 137 | - | |
| 138 | - // executa o curl, logo após fechando a conexão | |
| 139 | - $curl->exec(); | |
| 140 | - $curl->close(); | |
| 141 | - | |
| 142 | - // captura do retorno do curl | |
| 143 | - // o esperado deverá ser o HTML da imagem | |
| 144 | - $this->captcha = $curl->response(); | |
| 145 | - | |
| 146 | - // é uma imagem o retorno? | |
| 147 | - if(@imagecreatefromstring($this->captcha) == false) | |
| 148 | -        { | |
| 149 | -            throw new NoCaptchaResponse('Não foi possível capturar o captcha'); | |
| 150 | - } | |
| 151 | - | |
| 152 | - // constroe o base64 da imagem para o usuário digitar | |
| 153 | - // to-do: um serviço automatizado para decifrar o captcha? | |
| 154 | - // talvez deathbycaptcha? | |
| 155 | - $this->captchaImage = 'data:image/png;base64,' . base64_encode($this->captcha); | |
| 156 | - | |
| 157 | - return $this->captchaImage; | |
| 107 | + $curl = new Curl; | |
| 108 | + | |
| 109 | + // Inicia uma requisição para capturar a imagem do captcha | |
| 110 | + // informando cookie da requisição passada e os headers | |
| 111 | + // | |
| 112 | + // to-do: implementar guzzlehttp? | |
| 113 | + // ele é melhor que o curl? ou mais organizado? | |
| 114 | + $curl->init($this->configurations['captcha']); | |
| 115 | + | |
| 116 | + // headers da requisição | |
| 117 | + $curl->options([ | |
| 118 | + CURLOPT_COOKIEJAR => 'cookiejar', | |
| 119 | + CURLOPT_HTTPHEADER => array( | |
| 120 | + "Pragma: no-cache", | |
| 121 | + "Origin: " . $this->configurations['base'], | |
| 122 | + "Host: ". array_get($this->configurations, 'headers.Host'), | |
| 123 | + "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | |
| 124 | + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| 125 | + "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | |
| 126 | + "Accept-Encoding: gzip, deflate", | |
| 127 | + "Referer: " . $this->configurations['home'], | |
| 128 | + "Cookie: flag=1; ". $this->cookie, | |
| 129 | + "Connection: keep-alive" | |
| 130 | + ), | |
| 131 | + CURLOPT_RETURNTRANSFER => true, | |
| 132 | + CURLOPT_FOLLOWLOCATION => 1, | |
| 133 | + CURLOPT_BINARYTRANSFER => TRUE, | |
| 134 | + CURLOPT_CONNECTTIMEOUT => 10, | |
| 135 | + CURLOPT_TIMEOUT => 10, | |
| 136 | + ]); | |
| 137 | + | |
| 138 | + // executa o curl, logo após fechando a conexão | |
| 139 | + $curl->exec(); | |
| 140 | + $curl->close(); | |
| 141 | + | |
| 142 | + // captura do retorno do curl | |
| 143 | + // o esperado deverá ser o HTML da imagem | |
| 144 | + $this->captcha = $curl->response(); | |
| 145 | + | |
| 146 | + // é uma imagem o retorno? | |
| 147 | + if(@imagecreatefromstring($this->captcha) == false) | |
| 148 | +		{ | |
| 149 | +			throw new NoCaptchaResponse('Não foi possível capturar o captcha'); | |
| 150 | + } | |
| 151 | + | |
| 152 | + // constroe o base64 da imagem para o usuário digitar | |
| 153 | + // to-do: um serviço automatizado para decifrar o captcha? | |
| 154 | + // talvez deathbycaptcha? | |
| 155 | + $this->captchaImage = 'data:image/png;base64,' . base64_encode($this->captcha); | |
| 156 | + | |
| 157 | + return $this->captchaImage; | |
| 158 | 158 | } | 
| 159 | 159 | |
| 160 | 160 | /** | 
| @@ -196,59 +196,59 @@ discard block | ||
| 196 | 196 | public function getData($document, $cookie, $captcha, $params, $configurations) | 
| 197 | 197 |  	{ | 
| 198 | 198 | // prepara o form | 
| 199 | - $postParams = [ | |
| 200 | - 'origem' => 'comprovante', | |
| 201 | - 'cnpj' => $document, // apenas números | |
| 202 | - 'txtTexto_captcha_serpro_gov_br' => $captcha, | |
| 203 | - 'submit1' => 'Consultar', | |
| 204 | - 'search_type' => 'cnpj' | |
| 205 | - ]; | |
| 206 | - | |
| 207 | - $postParams = array_merge($postParams, $params); | |
| 208 | - | |
| 209 | - // inicia o cURL | |
| 210 | - $curl = new Curl; | |
| 211 | - | |
| 212 | - // vamos registrar qual serviço será consultado | |
| 213 | - $curl->init($configurations['data']); | |
| 214 | - | |
| 215 | - // define os headers para requisição curl. | |
| 216 | - $curl->options( | |
| 217 | - array( | |
| 218 | - CURLOPT_HTTPHEADER => array( | |
| 219 | - "Pragma: no-cache", | |
| 220 | - "Origin: " . $this->configurations['base'], | |
| 221 | - "Host: ". array_get($configurations, 'headers.Host'), | |
| 222 | - "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | |
| 223 | - "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| 224 | - "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | |
| 225 | - "Accept-Encoding: gzip, deflate", | |
| 226 | - "Referer: " . $this->configurations['home'] .'?cnpj='. $document, | |
| 227 | - "Cookie: flag=1; ". $cookie, | |
| 228 | - "Connection: keep-alive" | |
| 229 | - ), | |
| 230 | - CURLOPT_RETURNTRANSFER => 1, | |
| 231 | - CURLOPT_BINARYTRANSFER => 1, | |
| 232 | - CURLOPT_FOLLOWLOCATION => 1, | |
| 233 | - ) | |
| 234 | - ); | |
| 235 | - | |
| 236 | - // efetua a chamada passando os parametros de form | |
| 237 | - $curl->post($postParams); | |
| 238 | - $curl->exec(); | |
| 239 | - | |
| 240 | - // completa a chamda | |
| 241 | - $curl->close(); | |
| 242 | - | |
| 243 | - // vamos capturar retorno, que deverá ser o HTML para scrapping | |
| 244 | - $html = $curl->response(); | |
| 245 | - | |
| 246 | -        if(empty($html)) { | |
| 247 | -            throw new NoServiceResponse('No response from service', 99); | |
| 248 | - } | |
| 249 | - | |
| 250 | - $crawler = new Crawler($html, array_get($configurations, 'selectors.data')); | |
| 251 | - | |
| 252 | - return $crawler; | |
| 199 | + $postParams = [ | |
| 200 | + 'origem' => 'comprovante', | |
| 201 | + 'cnpj' => $document, // apenas números | |
| 202 | + 'txtTexto_captcha_serpro_gov_br' => $captcha, | |
| 203 | + 'submit1' => 'Consultar', | |
| 204 | + 'search_type' => 'cnpj' | |
| 205 | + ]; | |
| 206 | + | |
| 207 | + $postParams = array_merge($postParams, $params); | |
| 208 | + | |
| 209 | + // inicia o cURL | |
| 210 | + $curl = new Curl; | |
| 211 | + | |
| 212 | + // vamos registrar qual serviço será consultado | |
| 213 | + $curl->init($configurations['data']); | |
| 214 | + | |
| 215 | + // define os headers para requisição curl. | |
| 216 | + $curl->options( | |
| 217 | + array( | |
| 218 | + CURLOPT_HTTPHEADER => array( | |
| 219 | + "Pragma: no-cache", | |
| 220 | + "Origin: " . $this->configurations['base'], | |
| 221 | + "Host: ". array_get($configurations, 'headers.Host'), | |
| 222 | + "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | |
| 223 | + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| 224 | + "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | |
| 225 | + "Accept-Encoding: gzip, deflate", | |
| 226 | + "Referer: " . $this->configurations['home'] .'?cnpj='. $document, | |
| 227 | + "Cookie: flag=1; ". $cookie, | |
| 228 | + "Connection: keep-alive" | |
| 229 | + ), | |
| 230 | + CURLOPT_RETURNTRANSFER => 1, | |
| 231 | + CURLOPT_BINARYTRANSFER => 1, | |
| 232 | + CURLOPT_FOLLOWLOCATION => 1, | |
| 233 | + ) | |
| 234 | + ); | |
| 235 | + | |
| 236 | + // efetua a chamada passando os parametros de form | |
| 237 | + $curl->post($postParams); | |
| 238 | + $curl->exec(); | |
| 239 | + | |
| 240 | + // completa a chamda | |
| 241 | + $curl->close(); | |
| 242 | + | |
| 243 | + // vamos capturar retorno, que deverá ser o HTML para scrapping | |
| 244 | + $html = $curl->response(); | |
| 245 | + | |
| 246 | +		if(empty($html)) { | |
| 247 | +			throw new NoServiceResponse('No response from service', 99); | |
| 248 | + } | |
| 249 | + | |
| 250 | + $crawler = new Crawler($html, array_get($configurations, 'selectors.data')); | |
| 251 | + | |
| 252 | + return $crawler; | |
| 253 | 253 | } | 
| 254 | 254 | } | 
| 255 | 255 | \ No newline at end of file | 
| @@ -88,7 +88,7 @@ discard block | ||
| 88 | 88 | */ | 
| 89 | 89 | private function hasRequested() | 
| 90 | 90 |  	{ | 
| 91 | -		if(!$this->instanceResponse) { | |
| 91 | +		if (!$this->instanceResponse) { | |
| 92 | 92 |  			throw new NoServiceCall("No request from this service, please call first method request", 1);			 | 
| 93 | 93 | } | 
| 94 | 94 | |
| @@ -119,13 +119,13 @@ discard block | ||
| 119 | 119 | CURLOPT_HTTPHEADER => array( | 
| 120 | 120 | "Pragma: no-cache", | 
| 121 | 121 | "Origin: " . $this->configurations['base'], | 
| 122 | - "Host: ". array_get($this->configurations, 'headers.Host'), | |
| 122 | + "Host: " . array_get($this->configurations, 'headers.Host'), | |
| 123 | 123 | "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | 
| 124 | 124 | "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | 
| 125 | 125 | "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | 
| 126 | 126 | "Accept-Encoding: gzip, deflate", | 
| 127 | 127 | "Referer: " . $this->configurations['home'], | 
| 128 | - "Cookie: flag=1; ". $this->cookie, | |
| 128 | + "Cookie: flag=1; " . $this->cookie, | |
| 129 | 129 | "Connection: keep-alive" | 
| 130 | 130 | ), | 
| 131 | 131 | CURLOPT_RETURNTRANSFER => true, | 
| @@ -144,7 +144,7 @@ discard block | ||
| 144 | 144 | $this->captcha = $curl->response(); | 
| 145 | 145 | |
| 146 | 146 | // é uma imagem o retorno? | 
| 147 | - if(@imagecreatefromstring($this->captcha) == false) | |
| 147 | + if (@imagecreatefromstring($this->captcha) == false) | |
| 148 | 148 |          { | 
| 149 | 149 |              throw new NoCaptchaResponse('Não foi possível capturar o captcha'); | 
| 150 | 150 | } | 
| @@ -218,13 +218,13 @@ discard block | ||
| 218 | 218 | CURLOPT_HTTPHEADER => array( | 
| 219 | 219 | "Pragma: no-cache", | 
| 220 | 220 | "Origin: " . $this->configurations['base'], | 
| 221 | - "Host: ". array_get($configurations, 'headers.Host'), | |
| 221 | + "Host: " . array_get($configurations, 'headers.Host'), | |
| 222 | 222 | "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:32.0) Gecko/20100101 Firefox/32.0", | 
| 223 | 223 | "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | 
| 224 | 224 | "Accept-Language: pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3", | 
| 225 | 225 | "Accept-Encoding: gzip, deflate", | 
| 226 | - "Referer: " . $this->configurations['home'] .'?cnpj='. $document, | |
| 227 | - "Cookie: flag=1; ". $cookie, | |
| 226 | + "Referer: " . $this->configurations['home'] . '?cnpj=' . $document, | |
| 227 | + "Cookie: flag=1; " . $cookie, | |
| 228 | 228 | "Connection: keep-alive" | 
| 229 | 229 | ), | 
| 230 | 230 | CURLOPT_RETURNTRANSFER => 1, | 
| @@ -243,7 +243,7 @@ discard block | ||
| 243 | 243 | // vamos capturar retorno, que deverá ser o HTML para scrapping | 
| 244 | 244 | $html = $curl->response(); | 
| 245 | 245 | |
| 246 | -        if(empty($html)) { | |
| 246 | +        if (empty($html)) { | |
| 247 | 247 |              throw new NoServiceResponse('No response from service', 99); | 
| 248 | 248 | } | 
| 249 | 249 | |
| @@ -23,24 +23,24 @@ | ||
| 23 | 23 | 'data' => [ | 
| 24 | 24 | 'error' => 'body > center:nth-child(8) > table > tr > td > font > b', | 
| 25 | 25 | 'inscricao_estadual' => 'body > center:nth-child(9) > table > tr > td:nth-child(4) > font', | 
| 26 | - 'razao_social' => 'body > center:nth-child(10) > table > tr > td:nth-child(2) > font', | |
| 27 | - 'logradouro' => 'body > center:nth-child(13) > table > tr > td:nth-child(2) > font', | |
| 28 | - 'numero' => 'body > center:nth-child(14) > table > tr > td:nth-child(2) > font', | |
| 29 | - 'complemento' => 'body > center:nth-child(14) > table > tr > td:nth-child(4) > font', | |
| 30 | - 'bairro' => 'body > center:nth-child(15) > table > tr > td:nth-child(2) > font', | |
| 31 | - 'municipio' => 'body > center:nth-child(16) > table > tr > td:nth-child(2) > font', | |
| 32 | - 'uf' => 'body > center:nth-child(16) > table > tr > td:nth-child(4) > font', | |
| 33 | - 'cep' => 'body > center:nth-child(17) > table > tr > td:nth-child(2) > font', | |
| 34 | - 'atividade_economica' => 'body > center:nth-child(20) > table > tr > td:nth-child(2) > font', | |
| 35 | - 'situacao' => 'body > center:nth-child(21) > table > tr > td:nth-child(2) > font', | |
| 36 | - 'situacao2' => 'body > center:nth-child(21) > table > tr > td:nth-child(3) > font', | |
| 37 | - 'data_situacao' => 'body > center:nth-child(22) > table > tr > td:nth-child(2) > font', | |
| 38 | - 'regime' => 'body > center:nth-child(23) > table > tr > td:nth-child(2) > font', | |
| 39 | - 'data_emissor_nfe' => 'body > center:nth-child(24) > table > tr > td:nth-child(2) > font', | |
| 40 | - 'indicator_obrigatoriedade_nfe' => 'body > center:nth-child(25) > table > tr > td:nth-child(2) > font', | |
| 41 | - 'data_inicio_obrigatoriedade_nfe' => 'body > center:nth-child(26) > table > tr > td:nth-child(2) > font', | |
| 42 | - 'consulta' => 'body > center:nth-child(28) > table > tr:nth-child(2) > td:nth-child(2) > font > b', | |
| 43 | - 'observacoes' => 'body > center:nth-child(30) > table > tr > td > font:nth-child(1)', | |
| 26 | + 'razao_social' => 'body > center:nth-child(10) > table > tr > td:nth-child(2) > font', | |
| 27 | + 'logradouro' => 'body > center:nth-child(13) > table > tr > td:nth-child(2) > font', | |
| 28 | + 'numero' => 'body > center:nth-child(14) > table > tr > td:nth-child(2) > font', | |
| 29 | + 'complemento' => 'body > center:nth-child(14) > table > tr > td:nth-child(4) > font', | |
| 30 | + 'bairro' => 'body > center:nth-child(15) > table > tr > td:nth-child(2) > font', | |
| 31 | + 'municipio' => 'body > center:nth-child(16) > table > tr > td:nth-child(2) > font', | |
| 32 | + 'uf' => 'body > center:nth-child(16) > table > tr > td:nth-child(4) > font', | |
| 33 | + 'cep' => 'body > center:nth-child(17) > table > tr > td:nth-child(2) > font', | |
| 34 | + 'atividade_economica' => 'body > center:nth-child(20) > table > tr > td:nth-child(2) > font', | |
| 35 | + 'situacao' => 'body > center:nth-child(21) > table > tr > td:nth-child(2) > font', | |
| 36 | + 'situacao2' => 'body > center:nth-child(21) > table > tr > td:nth-child(3) > font', | |
| 37 | + 'data_situacao' => 'body > center:nth-child(22) > table > tr > td:nth-child(2) > font', | |
| 38 | + 'regime' => 'body > center:nth-child(23) > table > tr > td:nth-child(2) > font', | |
| 39 | + 'data_emissor_nfe' => 'body > center:nth-child(24) > table > tr > td:nth-child(2) > font', | |
| 40 | + 'indicator_obrigatoriedade_nfe' => 'body > center:nth-child(25) > table > tr > td:nth-child(2) > font', | |
| 41 | + 'data_inicio_obrigatoriedade_nfe' => 'body > center:nth-child(26) > table > tr > td:nth-child(2) > font', | |
| 42 | + 'consulta' => 'body > center:nth-child(28) > table > tr:nth-child(2) > td:nth-child(2) > font > b', | |
| 43 | + 'observacoes' => 'body > center:nth-child(30) > table > tr > td > font:nth-child(1)', | |
| 44 | 44 | ] | 
| 45 | 45 | ], | 
| 46 | 46 | 'headers' => [ |