@@ 16-46 (lines=31) @@ | ||
13 | * @author Matthijs van den Bos |
|
14 | * @copyright 2013 Matthijs van den Bos |
|
15 | */ |
|
16 | class CssSelectorDiscoverer extends Discoverer implements DiscovererInterface |
|
17 | { |
|
18 | /** @var string */ |
|
19 | protected $cssSelector; |
|
20 | ||
21 | /** |
|
22 | * @param $cssSelector |
|
23 | */ |
|
24 | public function __construct($cssSelector) |
|
25 | { |
|
26 | $this->cssSelector = $cssSelector; |
|
27 | } |
|
28 | ||
29 | /** |
|
30 | * @param Resource $resource |
|
31 | * @return DiscoveredUri[] |
|
32 | */ |
|
33 | public function discover(Resource $resource) |
|
34 | { |
|
35 | $crawler = $resource->getCrawler()->filter($this->cssSelector); |
|
36 | $uris = array(); |
|
37 | foreach ($crawler as $node) { |
|
38 | try { |
|
39 | $uris[] = new DiscoveredUri(new Uri($node->getAttribute('href'), $resource->getUri()->toString())); |
|
40 | } catch (UriSyntaxException $e) { |
|
41 | // do nothing. We simply ignore invalid URI's |
|
42 | } |
|
43 | } |
|
44 | return $uris; |
|
45 | } |
|
46 | } |
|
47 |
@@ 16-46 (lines=31) @@ | ||
13 | * @author Matthijs van den Bos |
|
14 | * @copyright 2013 Matthijs van den Bos |
|
15 | */ |
|
16 | class XPathExpressionDiscoverer extends Discoverer implements DiscovererInterface |
|
17 | { |
|
18 | /** @var string */ |
|
19 | protected $xpathExpression; |
|
20 | ||
21 | /** |
|
22 | * @param $xpathExpression |
|
23 | */ |
|
24 | public function __construct($xpathExpression) |
|
25 | { |
|
26 | $this->xpathExpression = $xpathExpression; |
|
27 | } |
|
28 | ||
29 | /** |
|
30 | * @param Resource $resource |
|
31 | * @return DiscoveredUri[] |
|
32 | */ |
|
33 | public function discover(Resource $resource) |
|
34 | { |
|
35 | $crawler = $resource->getCrawler()->filterXPath($this->xpathExpression); |
|
36 | $uris = array(); |
|
37 | foreach ($crawler as $node) { |
|
38 | try { |
|
39 | $uris[] = new DiscoveredUri(new Uri($node->getAttribute('href'), $resource->getUri()->toString())); |
|
40 | } catch (UriSyntaxException $e) { |
|
41 | // do nothing. We simply ignore invalid URI's |
|
42 | } |
|
43 | } |
|
44 | return $uris; |
|
45 | } |
|
46 | } |
|
47 |