1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* (c) Jean-François Lépine <https://twitter.com/Halleck45> |
5
|
|
|
* |
6
|
|
|
* For the full copyright and license information, please view the LICENSE |
7
|
|
|
* file that was distributed with this source code. |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
namespace Hal\Component\Token; |
11
|
|
|
use Hal\Component\Cache\Cache; |
12
|
|
|
use Hal\Component\Cache\CacheNull; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* Tokenize file |
16
|
|
|
* |
17
|
|
|
* @author Jean-François Lépine <https://twitter.com/Halleck45> |
18
|
|
|
*/ |
19
|
|
|
class Tokenizer { |
20
|
|
|
|
21
|
|
|
private $cache; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Tokenizer constructor. |
25
|
|
|
* @param $cache |
26
|
|
|
*/ |
27
|
|
|
public function __construct(Cache $cache = null) |
28
|
|
|
{ |
29
|
|
|
if(null == $cache) { |
30
|
|
|
$cache = new CacheNull(); |
31
|
|
|
} |
32
|
|
|
$this->cache = $cache; |
33
|
|
|
} |
34
|
|
|
|
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Tokenize file |
38
|
|
|
* |
39
|
|
|
* @param $filename |
40
|
|
|
* @return TokenCollection |
41
|
|
|
*/ |
42
|
|
|
public function tokenize($filename) { |
43
|
|
|
|
44
|
|
|
if($this->cache->has($filename)) { |
45
|
|
|
return new TokenCollection($this->cache->get($filename)); |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
$size = filesize($filename); |
49
|
|
|
$limit = 102400; // around 100 Ko |
50
|
|
|
if($size > $limit) { |
51
|
|
|
$tokens = $this->tokenizeLargeFile($filename); |
52
|
|
|
} else { |
53
|
|
|
$tokens = token_get_all($this->cleanup(file_get_contents($filename))); |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
$this->cache->set($filename, $tokens); |
57
|
|
|
return new TokenCollection($tokens); |
|
|
|
|
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Tokenize large files |
62
|
|
|
* |
63
|
|
|
* @param $filename |
64
|
|
|
* @return TokenCollection |
65
|
|
|
*/ |
66
|
|
|
protected function tokenizeLargeFile($filename) { |
67
|
|
|
// run in another process to allow catching fatal errors due to memory issues with "token_get_all()" function |
68
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/139 |
69
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/13 |
70
|
|
|
$code = <<<EOT |
71
|
|
|
\$c = file_get_contents("$filename"); |
72
|
|
|
\$c = preg_replace("!(<\?\s)!", "<?php ", \$c); |
73
|
|
|
echo serialize(token_get_all(\$c)); |
74
|
|
|
EOT; |
75
|
|
|
$output = shell_exec('php -r \'%s\'', $code); |
76
|
|
|
$tokens = unserialize($output); |
77
|
|
|
if(false === $tokens) { |
78
|
|
|
throw new NoTokenizableException(sprintf('Cannot tokenize "%s". This file is probably too big. Please try to increase memory_limit', $filename)); |
79
|
|
|
} |
80
|
|
|
return $tokens; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Clean php source |
85
|
|
|
* |
86
|
|
|
* @param $content |
87
|
|
|
* @return string |
88
|
|
|
*/ |
89
|
|
|
private function cleanup($content) { |
90
|
|
|
// replacing short open tags by <?php |
91
|
|
|
// if file contains short open tags but short_open_tags='Off' in php.ini bug can occur |
92
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/154 |
93
|
|
|
return preg_replace('!(<\?\s)!', '<?php ', $content); |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
} |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.