|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/* |
|
4
|
|
|
* (c) Jean-François Lépine <https://twitter.com/Halleck45> |
|
5
|
|
|
* |
|
6
|
|
|
* For the full copyright and license information, please view the LICENSE |
|
7
|
|
|
* file that was distributed with this source code. |
|
8
|
|
|
*/ |
|
9
|
|
|
|
|
10
|
|
|
namespace Hal\Component\Token; |
|
11
|
|
|
use Hal\Component\Cache\Cache; |
|
12
|
|
|
use Hal\Component\Cache\CacheNull; |
|
13
|
|
|
|
|
14
|
|
|
/** |
|
15
|
|
|
* Tokenize file |
|
16
|
|
|
* |
|
17
|
|
|
* @author Jean-François Lépine <https://twitter.com/Halleck45> |
|
18
|
|
|
*/ |
|
19
|
|
|
class Tokenizer { |
|
20
|
|
|
|
|
21
|
|
|
private $cache; |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* Tokenizer constructor. |
|
25
|
|
|
* @param $cache |
|
26
|
|
|
*/ |
|
27
|
|
|
public function __construct(Cache $cache = null) |
|
28
|
|
|
{ |
|
29
|
|
|
if(null == $cache) { |
|
30
|
|
|
$cache = new CacheNull(); |
|
31
|
|
|
} |
|
32
|
|
|
$this->cache = $cache; |
|
33
|
|
|
} |
|
34
|
|
|
|
|
35
|
|
|
|
|
36
|
|
|
/** |
|
37
|
|
|
* Tokenize file |
|
38
|
|
|
* |
|
39
|
|
|
* @param $filename |
|
40
|
|
|
* @return TokenCollection |
|
41
|
|
|
*/ |
|
42
|
|
|
public function tokenize($filename) { |
|
43
|
|
|
|
|
44
|
|
|
if($this->cache->has($filename)) { |
|
45
|
|
|
return new TokenCollection($this->cache->get($filename)); |
|
46
|
|
|
} |
|
47
|
|
|
|
|
48
|
|
|
$size = filesize($filename); |
|
49
|
|
|
$limit = 102400; // around 100 Ko |
|
50
|
|
|
if($size > $limit) { |
|
51
|
|
|
$tokens = $this->tokenizeLargeFile($filename); |
|
52
|
|
|
} else { |
|
53
|
|
|
$tokens = token_get_all($this->cleanup(file_get_contents($filename))); |
|
54
|
|
|
} |
|
55
|
|
|
|
|
56
|
|
|
$this->cache->set($filename, $tokens); |
|
57
|
|
|
return new TokenCollection($tokens); |
|
|
|
|
|
|
58
|
|
|
} |
|
59
|
|
|
|
|
60
|
|
|
/** |
|
61
|
|
|
* Tokenize large files |
|
62
|
|
|
* |
|
63
|
|
|
* @param $filename |
|
64
|
|
|
* @return TokenCollection |
|
65
|
|
|
*/ |
|
66
|
|
|
protected function tokenizeLargeFile($filename) { |
|
67
|
|
|
// run in another process to allow catching fatal errors due to memory issues with "token_get_all()" function |
|
68
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/139 |
|
69
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/13 |
|
70
|
|
|
$code = <<<EOT |
|
71
|
|
|
\$c = file_get_contents("$filename"); |
|
72
|
|
|
\$c = preg_replace("!(<\?\s)!", "<?php ", \$c); |
|
73
|
|
|
echo serialize(token_get_all(\$c)); |
|
74
|
|
|
EOT; |
|
75
|
|
|
$output = shell_exec('php -r \'%s\'', $code); |
|
76
|
|
|
$tokens = unserialize($output); |
|
77
|
|
|
if(false === $tokens) { |
|
78
|
|
|
throw new NoTokenizableException(sprintf('Cannot tokenize "%s". This file is probably too big. Please try to increase memory_limit', $filename)); |
|
79
|
|
|
} |
|
80
|
|
|
return $tokens; |
|
81
|
|
|
} |
|
82
|
|
|
|
|
83
|
|
|
/** |
|
84
|
|
|
* Clean php source |
|
85
|
|
|
* |
|
86
|
|
|
* @param $content |
|
87
|
|
|
* @return string |
|
88
|
|
|
*/ |
|
89
|
|
|
private function cleanup($content) { |
|
90
|
|
|
// replacing short open tags by <?php |
|
91
|
|
|
// if file contains short open tags but short_open_tags='Off' in php.ini bug can occur |
|
92
|
|
|
// @see https://github.com/Halleck45/PhpMetrics/issues/154 |
|
93
|
|
|
return preg_replace('!(<\?\s)!', '<?php ', $content); |
|
94
|
|
|
} |
|
95
|
|
|
|
|
96
|
|
|
} |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.