This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Zrashwani\NewsScrapper\Adapters; |
||
4 | |||
5 | use Symfony\Component\DomCrawler\Crawler; |
||
6 | use Zrashwani\NewsScrapper\Selector; |
||
7 | |||
8 | /** |
||
9 | * Adapter to extract page data from un-structured HTML document |
||
10 | * @author Zeid Rashwani <zrashwani.com> |
||
11 | */ |
||
12 | class CustomAdapter extends AbstractAdapter |
||
13 | { |
||
14 | |||
15 | private $authorSelector; |
||
16 | private $bodySelector; |
||
17 | private $descriptionSelector; |
||
18 | private $imageSelector; |
||
19 | private $keywordsSelector; |
||
20 | private $publishDateSelector; |
||
21 | private $titleSelector; |
||
22 | |||
23 | /** |
||
24 | * adapter used to fill in the missing selectors data by default values |
||
25 | * @var DefaultAdapter $fallbackAdapter |
||
26 | */ |
||
27 | private $fallbackAdapter; |
||
28 | |||
29 | public function __construct() |
||
30 | { |
||
31 | $this->fallbackAdapter = new DefaultAdapter(); |
||
32 | } |
||
33 | |||
34 | public function setAuthorSelector($selector) |
||
35 | { |
||
36 | $this->authorSelector = $selector; |
||
37 | return $this; |
||
38 | } |
||
39 | |||
40 | public function setBodySelector($selector) |
||
41 | { |
||
42 | $this->bodySelector = $selector; |
||
43 | return $this; |
||
44 | } |
||
45 | |||
46 | public function setDescriptionSelector($selector) |
||
47 | { |
||
48 | $this->descriptionSelector = $selector; |
||
49 | return $this; |
||
50 | } |
||
51 | |||
52 | public function setImageSelector($selector) |
||
53 | { |
||
54 | $this->imageSelector = $selector; |
||
55 | return $this; |
||
56 | } |
||
57 | |||
58 | public function setKeywordsSelector($selector) |
||
59 | { |
||
60 | $this->keywordsSelector = $selector; |
||
61 | return $this; |
||
62 | } |
||
63 | |||
64 | public function setPublishDateSelector($selector) |
||
65 | { |
||
66 | $this->publishDateSelector = $selector; |
||
67 | return $this; |
||
68 | } |
||
69 | |||
70 | public function setTitleSelector($selector) |
||
71 | { |
||
72 | $this->titleSelector = $selector; |
||
73 | return $this; |
||
74 | } |
||
75 | |||
76 | View Code Duplication | public function extractAuthor(Crawler $crawler) |
|
0 ignored issues
–
show
|
|||
77 | { |
||
78 | $ret = $this->getElementText($crawler, $this->authorSelector); |
||
79 | if (empty($ret) === true) { |
||
80 | $ret = $this->fallbackAdapter->extractAuthor($crawler); |
||
81 | } |
||
82 | return $ret; |
||
83 | } |
||
84 | |||
85 | public function extractBody(Crawler $crawler) |
||
86 | { |
||
87 | $ret = $this->getElementText($crawler, $this->bodySelector); |
||
88 | return $this->normalizeHtml($ret); |
||
89 | } |
||
90 | |||
91 | View Code Duplication | public function extractDescription(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
92 | { |
||
93 | $ret = $this->getElementText($crawler, $this->descriptionSelector); |
||
94 | if (empty($ret) === true) { |
||
95 | $ret = $this->fallbackAdapter->extractDescription($crawler); |
||
96 | } |
||
97 | return $ret; |
||
98 | } |
||
99 | |||
100 | public function extractImage(Crawler $crawler) |
||
101 | { |
||
102 | |||
103 | if (empty($this->imageSelector) === false) { |
||
104 | $ret = $this->getSrcByImgSelector($crawler, $this->imageSelector); |
||
105 | } |
||
106 | if (empty($ret) === true) { |
||
107 | $ret = $this->fallbackAdapter->extractImage($crawler); |
||
108 | } |
||
109 | |||
110 | if (empty($ret) === false) { |
||
111 | return $this->normalizeLink($ret); |
||
0 ignored issues
–
show
The variable
$ret does not seem to be defined for all execution paths leading up to this point.
If you define a variable conditionally, it can happen that it is not defined for all execution paths. Let’s take a look at an example: function myFunction($a) {
switch ($a) {
case 'foo':
$x = 1;
break;
case 'bar':
$x = 2;
break;
}
// $x is potentially undefined here.
echo $x;
}
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined. Available Fixes
![]() |
|||
112 | } else { |
||
113 | return null; |
||
114 | } |
||
115 | } |
||
116 | |||
117 | public function extractKeywords(Crawler $crawler) |
||
118 | { |
||
119 | $ret = $this->getElementText($crawler, $this->keywordsSelector); |
||
120 | if (empty($ret) === true) { |
||
121 | return $this->fallbackAdapter->extractKeywords($crawler); |
||
122 | } else { |
||
123 | return $this->normalizeKeywords(explode(',', $ret)); |
||
124 | } |
||
125 | } |
||
126 | |||
127 | View Code Duplication | public function extractPublishDate(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
128 | { |
||
129 | $ret = $this->getElementText($crawler, $this->publishDateSelector); |
||
130 | if (empty($ret) === true) { |
||
131 | $ret = $this->fallbackAdapter->extractPublishDate($crawler); |
||
132 | } |
||
133 | return $ret; |
||
134 | } |
||
135 | |||
136 | View Code Duplication | public function extractTitle(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
137 | { |
||
138 | $ret = $this->getElementText($crawler, $this->titleSelector); |
||
139 | if (empty($ret) === true) { |
||
140 | $ret = $this->fallbackAdapter->extractTitle($crawler); |
||
141 | } |
||
142 | return $ret; |
||
143 | } |
||
144 | |||
145 | /** |
||
146 | * getting text of element by selector (css selector or xpath ) |
||
147 | * @param Crawler $crawler |
||
148 | * @param string $selector |
||
149 | * @param \Closure $extractClosure callback function to be used for extraction |
||
150 | * @return string |
||
151 | */ |
||
152 | protected function getElementText(Crawler $crawler, $selector, $extractClosure = null) |
||
153 | { |
||
154 | |||
155 | if (empty($selector) === true) { |
||
156 | return null; |
||
157 | } |
||
158 | |||
159 | $ret = null; |
||
160 | if ($extractClosure === null) { |
||
161 | $extractClosure = function(Crawler $node) use (&$ret) { |
||
162 | $ret = $node->html(); |
||
163 | }; |
||
164 | } |
||
165 | if (Selector::isCSS($selector)) { |
||
166 | $crawler->filter($selector) |
||
167 | ->each($extractClosure); |
||
168 | } else { |
||
169 | $crawler->filterXPath($selector) |
||
170 | ->each($extractClosure); |
||
171 | } |
||
172 | |||
173 | return $ret; |
||
174 | } |
||
175 | } |
||
176 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.