This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Zrashwani\NewsScrapper\Adapters; |
||
4 | |||
5 | use \Symfony\Component\DomCrawler\Crawler; |
||
6 | |||
7 | /** |
||
8 | * Adapter to extract news base on open graph protocol specifications |
||
9 | * @link http://ogp.me/ open graph meta data specifications |
||
10 | * @author Zeid Rashwani <zrashwani.com> |
||
11 | */ |
||
12 | class OpenGraphAdapter extends AbstractAdapter |
||
13 | { |
||
14 | /** |
||
15 | * extract title information from crawler object |
||
16 | * @param Crawler $crawler |
||
17 | * @return string |
||
18 | */ |
||
19 | public function extractTitle(Crawler $crawler) |
||
20 | { |
||
21 | $ret = null; |
||
22 | |||
23 | $crawler->filterXPath("//head/meta[@property='og:title']") |
||
24 | ->each( |
||
25 | function(Crawler $node) use (&$ret) { |
||
26 | $ret = $node->attr('content'); |
||
27 | } |
||
28 | ); |
||
29 | |||
30 | //fallback in case document don't have og:title |
||
31 | if (empty($ret) === true) { |
||
32 | $crawler->filterXPath('//h1') |
||
33 | ->each( |
||
34 | function(Crawler $node) use (&$ret) { |
||
35 | $ret = $node->text(); |
||
36 | } |
||
37 | ); |
||
38 | } |
||
39 | |||
40 | if (empty($ret) === true) { |
||
41 | $crawler->filterXPath('//head/title') |
||
42 | ->each( |
||
43 | function(Crawler $node) use (&$ret) { |
||
44 | $ret = $node->text(); |
||
45 | } |
||
46 | ); |
||
47 | } |
||
48 | |||
49 | return $ret; |
||
50 | } |
||
51 | |||
52 | /** |
||
53 | * extract image url from crawler open graph |
||
54 | * @param Crawler $crawler |
||
55 | * @return string |
||
56 | */ |
||
57 | public function extractImage(Crawler $crawler) |
||
58 | { |
||
59 | $ret = null; |
||
60 | $theAdapter = $this; |
||
61 | |||
62 | $crawler->filterXPath("//head/meta[@property='og:image']") |
||
63 | ->each( |
||
64 | function(Crawler $node) use (&$ret) { |
||
65 | if($this->getCheckSmallImage($node->attr('content')) === false){ //not small image size |
||
66 | $ret = $node->attr('content'); |
||
67 | } |
||
68 | } |
||
69 | ); |
||
70 | |||
71 | if (empty($ret) === true) { |
||
72 | $crawler->filterXPath('//img') |
||
73 | ->each( |
||
74 | function(Crawler $node) use (&$ret, $theAdapter) { |
||
75 | $img_src = $theAdapter->normalizeLink($node->attr('src')); |
||
76 | $width_org = $height_org = 0; |
||
77 | |||
78 | $url = pathinfo($img_src); |
||
79 | list($width, $height) = getimagesize($url['dirname'].'/'.urlencode($url['basename'])); |
||
80 | |||
81 | View Code Duplication | if (empty($ret) === false) { |
|
0 ignored issues
–
show
|
|||
82 | $url_ret = pathinfo($ret); |
||
83 | list($width_org, $height_org) = getimagesize( |
||
84 | $url_ret['dirname']. |
||
85 | '/'.urlencode($url_ret['basename']) |
||
86 | ); |
||
87 | } |
||
88 | if ($width > $width_org && $height > $height_org |
||
89 | && $width > 200 && $height > 200 //min size of the image amended |
||
90 | ) { |
||
91 | $ret = $img_src; |
||
92 | } |
||
93 | } |
||
94 | ); |
||
95 | } |
||
96 | |||
97 | if (empty($ret) === false) { |
||
98 | $ret = $this->normalizeLink($ret); |
||
99 | } |
||
100 | |||
101 | return $ret; |
||
102 | } |
||
103 | |||
104 | View Code Duplication | public function extractDescription(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
105 | { |
||
106 | $ret = null; |
||
107 | |||
108 | $crawler->filterXPath("//head/meta[@property='og:description']") |
||
109 | ->each( |
||
110 | function(Crawler $node) use (&$ret) { |
||
111 | $ret = $node->attr('content'); |
||
112 | } |
||
113 | ); |
||
114 | |||
115 | return $ret; |
||
116 | } |
||
117 | |||
118 | /** |
||
119 | * extract keywords out of crawler object |
||
120 | * @param Crawler $crawler |
||
121 | * @return array |
||
122 | */ |
||
123 | View Code Duplication | public function extractKeywords(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
124 | { |
||
125 | $ret = array(); |
||
126 | |||
127 | $crawler->filterXPath("//head/meta[@property='og:keywords']") |
||
128 | ->each( |
||
129 | function(Crawler $node) use (&$ret) { |
||
130 | |||
131 | $node_txt = trim($node->attr('content')); |
||
132 | if (!empty($node_txt)) { |
||
133 | $ret = explode(',', $node_txt); |
||
134 | |||
135 | } |
||
136 | } |
||
137 | ); |
||
138 | |||
139 | return $ret; |
||
140 | } |
||
141 | |||
142 | public function extractBody(Crawler $crawler) |
||
143 | { |
||
144 | //No body can be extracted from open graph protocol |
||
145 | return null; |
||
146 | } |
||
147 | |||
148 | View Code Duplication | public function extractPublishDate(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
149 | { |
||
150 | $date_str = null; |
||
151 | |||
152 | $crawler->filterXPath("//head/meta[@property='article:published_time']") |
||
153 | ->each( |
||
154 | function(Crawler $node) use (&$date_str) { |
||
155 | $date_str = $node->attr('content'); |
||
156 | } |
||
157 | ); |
||
158 | |||
159 | if (!is_null($date_str)) { |
||
160 | $ret = new \DateTime($date_str); |
||
161 | return $ret->format(\DateTime::ISO8601); |
||
162 | } else { |
||
163 | return null; |
||
164 | } |
||
165 | } |
||
166 | |||
167 | View Code Duplication | public function extractAuthor(Crawler $crawler) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. ![]() |
|||
168 | { |
||
169 | $ret = null; |
||
170 | $crawler->filterXPath("//head/meta[@property='article:author']") |
||
171 | ->each( |
||
172 | function(Crawler $node) use (&$ret) { |
||
173 | $ret = $node->attr('content'); |
||
174 | } |
||
175 | ); |
||
176 | |||
177 | return $ret; |
||
178 | } |
||
179 | |||
180 | public function getCheckSmallImage($imageUrl){ |
||
181 | |||
182 | $url_ret = pathinfo($imageUrl); |
||
183 | list($width_org, $height_org) = getimagesize( |
||
184 | $url_ret['dirname'].'/'.urlencode($url_ret['basename']) |
||
185 | ); |
||
186 | |||
187 | if($width_org<200 || $height_org < 200){ |
||
188 | return true; |
||
189 | }else{ |
||
190 | return false; |
||
191 | } |
||
192 | } |
||
193 | } |
||
194 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.