@@ -68,7 +68,7 @@ |
||
| 68 | 68 | } |
| 69 | 69 | |
| 70 | 70 | /** |
| 71 | - * @param CrawlUrl|\Psr\Http\Message\UriInterface|string $crawlUrl |
|
| 71 | + * @param CrawlUrl $crawlUrl |
|
| 72 | 72 | * |
| 73 | 73 | * @return bool |
| 74 | 74 | */ |
@@ -2,9 +2,9 @@ |
||
| 2 | 2 | |
| 3 | 3 | namespace Spatie\Crawler; |
| 4 | 4 | |
| 5 | -use Psr\Http\Message\UriInterface; |
|
| 6 | -use Psr\Http\Message\ResponseInterface; |
|
| 7 | 5 | use GuzzleHttp\Exception\RequestException; |
| 6 | +use Psr\Http\Message\ResponseInterface; |
|
| 7 | +use Psr\Http\Message\UriInterface; |
|
| 8 | 8 | |
| 9 | 9 | abstract class CrawlObserver |
| 10 | 10 | { |
@@ -2,9 +2,9 @@ |
||
| 2 | 2 | |
| 3 | 3 | namespace Spatie\Crawler; |
| 4 | 4 | |
| 5 | -use Spatie\Robots\RobotsMeta; |
|
| 6 | -use Spatie\Robots\RobotsHeaders; |
|
| 7 | 5 | use Psr\Http\Message\ResponseInterface; |
| 6 | +use Spatie\Robots\RobotsHeaders; |
|
| 7 | +use Spatie\Robots\RobotsMeta; |
|
| 8 | 8 | |
| 9 | 9 | class CrawlerRobots |
| 10 | 10 | { |
@@ -3,20 +3,20 @@ |
||
| 3 | 3 | namespace Spatie\Crawler; |
| 4 | 4 | |
| 5 | 5 | use Generator; |
| 6 | -use Tree\Node\Node; |
|
| 7 | -use GuzzleHttp\Pool; |
|
| 8 | 6 | use GuzzleHttp\Client; |
| 9 | -use GuzzleHttp\Psr7\Uri; |
|
| 7 | +use GuzzleHttp\Pool; |
|
| 10 | 8 | use GuzzleHttp\Psr7\Request; |
| 11 | -use Spatie\Robots\RobotsTxt; |
|
| 9 | +use GuzzleHttp\Psr7\Uri; |
|
| 12 | 10 | use GuzzleHttp\RequestOptions; |
| 13 | 11 | use Psr\Http\Message\UriInterface; |
| 14 | 12 | use Spatie\Browsershot\Browsershot; |
| 13 | +use Spatie\Crawler\CrawlQueue\CollectionCrawlQueue; |
|
| 15 | 14 | use Spatie\Crawler\CrawlQueue\CrawlQueue; |
| 15 | +use Spatie\Crawler\Exception\InvalidCrawlRequestHandler; |
|
| 16 | 16 | use Spatie\Crawler\Handlers\CrawlRequestFailed; |
| 17 | 17 | use Spatie\Crawler\Handlers\CrawlRequestFulfilled; |
| 18 | -use Spatie\Crawler\CrawlQueue\CollectionCrawlQueue; |
|
| 19 | -use Spatie\Crawler\Exception\InvalidCrawlRequestHandler; |
|
| 18 | +use Spatie\Robots\RobotsTxt; |
|
| 19 | +use Tree\Node\Node; |
|
| 20 | 20 | |
| 21 | 21 | class Crawler |
| 22 | 22 | { |
@@ -340,6 +340,9 @@ |
||
| 340 | 340 | } |
| 341 | 341 | } |
| 342 | 342 | |
| 343 | + /** |
|
| 344 | + * @return Node |
|
| 345 | + */ |
|
| 343 | 346 | public function addToDepthTree(UriInterface $url, UriInterface $parentUrl, Node $node = null): ?Node |
| 344 | 347 | { |
| 345 | 348 | if (is_null($this->maximumDepth)) { |
@@ -74,6 +74,9 @@ |
||
| 74 | 74 | $this->crawler->getCrawlObservers()->crawled($crawlUrl, $response); |
| 75 | 75 | } |
| 76 | 76 | |
| 77 | + /** |
|
| 78 | + * @param integer $readMaximumBytes |
|
| 79 | + */ |
|
| 77 | 80 | protected function convertBodyToString(StreamInterface $bodyStream, $readMaximumBytes = 1024 * 1024 * 2): string |
| 78 | 81 | { |
| 79 | 82 | $bodyStream->rewind(); |
@@ -3,15 +3,15 @@ |
||
| 3 | 3 | namespace Spatie\Crawler\Handlers; |
| 4 | 4 | |
| 5 | 5 | use GuzzleHttp\Psr7\Uri; |
| 6 | -use Spatie\Crawler\Crawler; |
|
| 7 | -use Spatie\Crawler\CrawlUrl; |
|
| 8 | -use Spatie\Crawler\LinkAdder; |
|
| 9 | -use Spatie\Crawler\CrawlerRobots; |
|
| 10 | 6 | use GuzzleHttp\RedirectMiddleware; |
| 7 | +use Psr\Http\Message\ResponseInterface; |
|
| 8 | +use Psr\Http\Message\StreamInterface; |
|
| 11 | 9 | use Psr\Http\Message\UriInterface; |
| 12 | 10 | use Spatie\Crawler\CrawlSubdomains; |
| 13 | -use Psr\Http\Message\StreamInterface; |
|
| 14 | -use Psr\Http\Message\ResponseInterface; |
|
| 11 | +use Spatie\Crawler\CrawlUrl; |
|
| 12 | +use Spatie\Crawler\Crawler; |
|
| 13 | +use Spatie\Crawler\CrawlerRobots; |
|
| 14 | +use Spatie\Crawler\LinkAdder; |
|
| 15 | 15 | use function GuzzleHttp\Psr7\stream_for; |
| 16 | 16 | |
| 17 | 17 | class CrawlRequestFulfilled |