bilginpro /
agency-aa
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace BilginPro\Agency\Aa; |
||
| 4 | |||
| 5 | use BilginPro\Agency\Aa\Exceptions\AuthenticationException; |
||
| 6 | use BilginPro\Agency\Aa\Exceptions\NoDataFoundException; |
||
| 7 | use Carbon\Carbon; |
||
| 8 | use GuzzleHttp; |
||
| 9 | |||
| 10 | /** |
||
| 11 | * Class Crawler |
||
| 12 | * @package BilginPro\Ajans\Aa |
||
| 13 | */ |
||
| 14 | class Crawler |
||
| 15 | { |
||
| 16 | /** |
||
| 17 | * Base URL of AA API |
||
| 18 | */ |
||
| 19 | const API_BASE_URL = 'https://api.aa.com.tr'; |
||
| 20 | |||
| 21 | /** |
||
| 22 | * @var string |
||
| 23 | */ |
||
| 24 | protected $user_name = ''; |
||
| 25 | |||
| 26 | /** |
||
| 27 | * @var string |
||
| 28 | */ |
||
| 29 | protected $password = ''; |
||
| 30 | |||
| 31 | /** |
||
| 32 | * @var int |
||
| 33 | */ |
||
| 34 | protected $summary_length = 150; |
||
| 35 | |||
| 36 | /** |
||
| 37 | * @var array |
||
| 38 | */ |
||
| 39 | protected $attributes = [ |
||
| 40 | 'filter_language' => '1', |
||
| 41 | 'filter_type' => '1', |
||
| 42 | 'limit' => '5', |
||
| 43 | ]; |
||
| 44 | |||
| 45 | /** |
||
| 46 | * @var array |
||
| 47 | */ |
||
| 48 | protected $auth = ['', '']; |
||
| 49 | |||
| 50 | /** |
||
| 51 | * Create a new Crawler Instance |
||
| 52 | */ |
||
| 53 | public function __construct($config) |
||
| 54 | { |
||
| 55 | $this->setParameters($config); |
||
| 56 | } |
||
| 57 | |||
| 58 | /** |
||
| 59 | * @param array $attributes |
||
| 60 | * @return array |
||
| 61 | */ |
||
| 62 | public function crawl($attributes = []) |
||
| 63 | { |
||
| 64 | $this->setAttributes($attributes); |
||
| 65 | |||
| 66 | $result = []; |
||
| 67 | |||
| 68 | usleep(500000); |
||
| 69 | $search = $this->search(); |
||
| 70 | |||
| 71 | usleep(500000); |
||
| 72 | foreach ($search->data->result as $item) { |
||
| 73 | $newsml = $this->document($item->id); |
||
| 74 | if (!empty($newsml)) { |
||
| 75 | $news = $this->newsmlToNews($newsml); |
||
| 76 | $result[] = $news; |
||
| 77 | } |
||
| 78 | usleep(500000); |
||
| 79 | } |
||
| 80 | return $result; |
||
| 81 | } |
||
| 82 | |||
| 83 | |||
| 84 | /** |
||
| 85 | * Creates a news object from NewsML SimpleXmlElement instance. |
||
| 86 | * @param \SimpleXMLElement $xml |
||
| 87 | * @return \stdClass |
||
| 88 | */ |
||
| 89 | protected function newsmlToNews($xml) |
||
| 90 | { |
||
| 91 | $news = new \stdClass(); |
||
| 92 | $xml->registerXPathNamespace("n", "http://iptc.org/std/nar/2006-10-01/"); |
||
| 93 | $news->code = (string)($xml->itemSet->newsItem['guid']); |
||
| 94 | $news->title = (string)$xml->itemSet->newsItem->contentMeta->headline; |
||
| 95 | $news->summary = (string)$xml->itemSet->newsItem->contentSet->inlineXML->nitf->body->{'body.head'}->abstract; |
||
| 96 | $news->content = (string)$xml->itemSet->newsItem->contentSet->inlineXML->nitf->body->{'body.content'}; |
||
| 97 | $news->created_at = (new Carbon($xml->itemSet->newsItem->itemMeta->versionCreated)) |
||
| 98 | ->addHours(3)->format('d.m.Y H:i:s'); |
||
| 99 | $news->category = (string)$xml->xpath('//n:subject/n:name[@xml:lang="tr"]')[0]; |
||
| 100 | $news->city = ''; |
||
| 101 | if (isset($xml->xpath('//n:contentMeta/n:located[@type="cptype:city"]/n:name[@xml:lang="tr"]')[0])) { |
||
| 102 | $news->city = (string)$xml |
||
| 103 | ->xpath('//n:contentMeta/n:located[@type="cptype:city"]/n:name[@xml:lang="tr"]')[0]; |
||
| 104 | } |
||
| 105 | $news->images = []; |
||
| 106 | if (isset($xml->xpath('//n:newsItem/n:itemMeta/n:link[@rel="irel:seeAlso"]')[0]['residref'])) { |
||
| 107 | $picture_id = (string)$xml->xpath('//n:newsItem/n:itemMeta/n:link[@rel="irel:seeAlso"]')[0]['residref']; |
||
| 108 | $news->images[] = $this->getDocumentLink($picture_id, 'print'); |
||
| 109 | } |
||
| 110 | return $news; |
||
| 111 | } |
||
| 112 | |||
| 113 | /** |
||
| 114 | * Creates document link for next requests. |
||
| 115 | * @param string $id |
||
| 116 | * @param string $format |
||
| 117 | * @return string |
||
| 118 | */ |
||
| 119 | protected function getDocumentLink($id, $format) |
||
| 120 | { |
||
| 121 | return self::API_BASE_URL . '/abone/document/' . $id . '/' . $format; |
||
| 122 | } |
||
| 123 | |||
| 124 | /** |
||
| 125 | * Fetches NewsML document, creates a SimpleXMLElement instance and returns it. |
||
| 126 | * @param $id |
||
| 127 | * @return null|\SimpleXMLElement |
||
| 128 | */ |
||
| 129 | protected function document($id) |
||
| 130 | { |
||
| 131 | $xml = null; |
||
|
0 ignored issues
–
show
|
|||
| 132 | $url = self::API_BASE_URL . '/abone/document/' . $id . '/newsml29?v=2' . rand(1000, 9999); |
||
| 133 | $newsml = $this->fetchUrl($url, 'GET', ['auth' => $this->auth]); |
||
| 134 | $xml = simplexml_load_string($newsml); |
||
| 135 | return $xml; |
||
| 136 | } |
||
| 137 | |||
| 138 | /** |
||
| 139 | * Searchs documents with given filter attributes. |
||
| 140 | * @return mixed |
||
| 141 | */ |
||
| 142 | protected function search() |
||
| 143 | { |
||
| 144 | $res = $this->fetchUrl(self::API_BASE_URL . '/abone/search', 'POST', [ |
||
| 145 | 'auth' => $this->auth, |
||
| 146 | 'form_params' => $this->attributes |
||
| 147 | ]); |
||
| 148 | |||
| 149 | $search = json_decode($res); |
||
| 150 | |||
| 151 | switch ($search->response->code) { |
||
| 152 | case 200: |
||
| 153 | break; |
||
| 154 | case 401: |
||
| 155 | throw new AuthenticationException; |
||
| 156 | default: |
||
| 157 | throw new NoDataFoundException; |
||
| 158 | } |
||
| 159 | return $search; |
||
| 160 | } |
||
| 161 | |||
| 162 | /** |
||
| 163 | * Creates short summary of the news, strip credits. |
||
| 164 | * @param string $text |
||
| 165 | * @return string |
||
| 166 | */ |
||
| 167 | protected function createSummary($text) |
||
| 168 | { |
||
| 169 | if (strpos($text, '(DHA)') > 0) { |
||
| 170 | $split = explode('(DHA)', $text); |
||
| 171 | if (count($split) > 1) { |
||
| 172 | $text = $split[1]; |
||
| 173 | $text = trim($text, ' \t\n\r\0\x0B-'); |
||
| 174 | } |
||
| 175 | } |
||
| 176 | $summary = (string)$this->shortenString(strip_tags($text), $this->summary_length); |
||
| 177 | |||
| 178 | return $summary; |
||
| 179 | } |
||
| 180 | |||
| 181 | /** |
||
| 182 | * Sets config parameters. |
||
| 183 | * @param $config |
||
| 184 | */ |
||
| 185 | protected function setParameters($config) |
||
| 186 | { |
||
| 187 | if (!is_array($config)) { |
||
| 188 | throw new \InvalidArgumentException('$config variable must be an array.'); |
||
| 189 | } |
||
| 190 | if (array_key_exists('user_name', $config)) { |
||
| 191 | $this->user_name = $config['user_name']; |
||
| 192 | } |
||
| 193 | if (array_key_exists('password', $config)) { |
||
| 194 | $this->password = $config['password']; |
||
| 195 | } |
||
| 196 | |||
| 197 | $this->auth = [$this->user_name, $this->password]; |
||
| 198 | } |
||
| 199 | |||
| 200 | /** |
||
| 201 | * Sets filter attributes. |
||
| 202 | * @param $attributes array |
||
| 203 | */ |
||
| 204 | protected function setAttributes($attributes) |
||
| 205 | { |
||
| 206 | foreach ($attributes as $key => $value) { |
||
| 207 | $this->attributes[$key] = $value; |
||
| 208 | } |
||
| 209 | } |
||
| 210 | |||
| 211 | |||
| 212 | /** |
||
| 213 | * Fethches given url and returns response as string. |
||
| 214 | * @param string $url |
||
| 215 | * @param string $method |
||
| 216 | * @param array $options |
||
| 217 | * |
||
| 218 | * @return string |
||
| 219 | */ |
||
| 220 | protected function fetchUrl($url, $method = 'GET', $options = []) |
||
| 221 | { |
||
| 222 | $client = new GuzzleHttp\Client(); |
||
| 223 | $res = $client->request($method, $url, $options); |
||
| 224 | if ($res->getStatusCode() == 200) { |
||
| 225 | return (string)$res->getBody(); |
||
| 226 | } |
||
| 227 | return ''; |
||
| 228 | } |
||
| 229 | |||
| 230 | /** |
||
| 231 | * Cuts the given string from the end of the appropriate word. |
||
| 232 | * @param string $str |
||
| 233 | * @param int $len |
||
| 234 | * @return string |
||
| 235 | */ |
||
| 236 | protected function shortenString($str, $len) |
||
| 237 | { |
||
| 238 | if (strlen($str) > $len) { |
||
| 239 | $str = rtrim(mb_substr($str, 0, $len, 'UTF-8')); |
||
| 240 | $str = substr($str, 0, strrpos($str, ' ')); |
||
| 241 | $str .= '...'; |
||
| 242 | $str = str_replace(',...', '...', $str); |
||
| 243 | } |
||
| 244 | return $str; |
||
| 245 | } |
||
| 246 | |||
| 247 | /** |
||
| 248 | * Converts a string to "Title Case" |
||
| 249 | * @param $str |
||
| 250 | * @return string |
||
| 251 | */ |
||
| 252 | protected function titleCase($str) |
||
| 253 | { |
||
| 254 | $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8'); |
||
| 255 | return $str; |
||
| 256 | } |
||
| 257 | } |
||
| 258 |
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.
Both the
$myVarassignment in line 1 and the$higherassignment in line 2 are dead. The first because$myVaris never used and the second because$higheris always overwritten for every possible time line.