1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Asparagus; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Package-private class to help with regexes. |
7
|
|
|
* |
8
|
|
|
* Supported magic words are: |
9
|
|
|
* - \{variable} |
10
|
|
|
* - \{iri} |
11
|
|
|
* - \{prefix} |
12
|
|
|
* - \{name} |
13
|
|
|
* - \{prefixed_iri} |
14
|
|
|
* - \{native} |
15
|
|
|
* - \{path} |
16
|
|
|
* - \{function} |
17
|
|
|
* |
18
|
|
|
* @license GNU GPL v2+ |
19
|
|
|
* @author Bene* < [email protected] > |
20
|
|
|
*/ |
21
|
|
|
class RegexHelper { |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* @var string regex to match variables |
25
|
|
|
*/ |
26
|
|
|
private static $variable = '[?$](\w+)'; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* @var string regex to match IRIs |
30
|
|
|
*/ |
31
|
|
|
private static $iri = '[^\s<>"{}|\\\\^`]+'; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* @var string regex to match prefixes |
35
|
|
|
*/ |
36
|
|
|
private static $prefix = '\w+'; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @var string regex to match names after prefixes |
40
|
|
|
*/ |
41
|
|
|
private static $name = '\w+'; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* @var string regex to match strings and numbers |
45
|
|
|
*/ |
46
|
|
|
private static $native = '([0-9]+|".*")'; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @var string[] list of natively supported functions |
50
|
|
|
*/ |
51
|
|
|
private static $functions = array( |
52
|
|
|
'COUNT', 'SUM', 'MIN', 'MAX', 'AVG', 'SAMPLE', 'GROUP_CONCAT', 'STR', |
53
|
|
|
'LANG', 'LANGMATCHES', 'DATATYPE', 'BOUND', 'IRI', 'URI', 'BNODE', |
54
|
|
|
'RAND', 'ABS', 'CEIL', 'FLOOR', 'ROUND', 'CONCAT', 'STRLEN', 'UCASE', |
55
|
|
|
'LCASE', 'ENCODE_FOR_URI', 'CONTAINS', 'STRSTARTS', 'STRENDS', |
56
|
|
|
'STRBEFORE', 'STRAFTER', 'YEAR', 'MONTH', 'DAY', 'HOURS', 'MINUTES', |
57
|
|
|
'SECONDS', 'TIMEZONE', 'TZ', 'NOW', 'UUID', 'STRUUID', 'MD5', 'SHA1', |
58
|
|
|
'SHA256', 'SHA384', 'SHA512', 'COALESCE', 'IF', 'STRLANG', 'STRDT', |
59
|
|
|
'sameTerm', 'isIRI', 'isURI', 'isBLANK', 'isLITERAL', 'isNUMERIC', |
60
|
|
|
'REGEX', 'SUBSTR', 'REPLACE' |
61
|
|
|
); |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* Checks if the expression matches the given regex. |
65
|
|
|
* |
66
|
|
|
* @param string $regex |
67
|
|
|
* @param string $expression |
68
|
|
|
* @return bool |
69
|
|
|
*/ |
70
|
|
|
public function matchesRegex( $regex, $expression ) { |
71
|
|
|
return preg_match( '/^' . $this->resolveMagic( $regex ) . '$/i', $expression ) === 1; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Returns all matching groups for the given regex. |
76
|
|
|
* String and IRI equences are automatically escaped. |
77
|
|
|
* |
78
|
|
|
* @param string $regex |
79
|
|
|
* @param string $expression |
80
|
|
|
* @param int $group |
81
|
|
|
* @return string[] |
82
|
|
|
*/ |
83
|
|
|
public function getMatches( $regex, $expression, $group = 1 ) { |
84
|
|
|
if ( preg_match_all( |
85
|
|
|
'/' . $this->resolveMagic( $regex ) . '/', |
86
|
|
|
$this->escapeSequences( $expression ), |
87
|
|
|
$matches |
88
|
|
|
) ) { |
89
|
|
|
return $matches[$group]; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
return array(); |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Escapes all sequences (IRIs and strings) and sets the replacements. |
97
|
|
|
* |
98
|
|
|
* @param string $expression |
99
|
|
|
* @param string[] $replacements |
100
|
|
|
* @return string |
101
|
|
|
*/ |
102
|
|
|
public function escapeSequences( $expression, &$replacements = null ) { |
103
|
|
|
$replacements = array(); |
104
|
|
|
// @todo this is not completely safe but works in most cases |
105
|
|
|
// @todo for strings use http://stackoverflow.com/questions/171480/regex-grabbing-values-between-quotation-marks |
|
|
|
|
106
|
|
|
return preg_replace_callback( |
107
|
|
|
'/("([^\"]*)"|\<([^\>]*)\>)/', |
108
|
|
|
function( $match ) use ( &$replacements ) { |
109
|
|
|
$key = '<' . md5( $match[0] ) . '>'; |
110
|
|
|
$replacements[$key] = $match[0]; |
111
|
|
|
return $key; |
112
|
|
|
}, |
113
|
|
|
$expression |
114
|
|
|
); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
private function resolveMagic( $regex ) { |
118
|
|
|
$magics = array( |
119
|
|
|
'\{variable}' => self::$variable, |
120
|
|
|
'\{iri}' => self::$iri, |
121
|
|
|
'\{prefix}' => self::$prefix, |
122
|
|
|
'\{name}' => self::$name, |
123
|
|
|
'\{prefixed_iri}' => $this->getPrefixedIriRegex(), |
124
|
|
|
'\{native}' => self::$native, |
125
|
|
|
'\{path}' => $this->getPathRegex(), |
126
|
|
|
'\{function}' => $this->getFunctionRegex() |
127
|
|
|
); |
128
|
|
|
|
129
|
|
|
return strtr( $regex, $magics ); |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
private function getPrefixedIriRegex() { |
133
|
|
|
return '(' . self::$prefix . ':' . self::$name . '|\<' . self::$iri . '\>)'; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
private function getPathRegex() { |
137
|
|
|
$element = '!?\^?(a|' . $this->getPrefixedIriRegex() . '|\((?1)\))(\?|\*|\+)?'; |
138
|
|
|
return '(' . $element . '([\/\|]' . $element . ')*)'; |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
private function getFunctionRegex() { |
142
|
|
|
$allowed = array_merge( self::$functions, array( '\<' . self::$iri . '\>', self::$prefix . ':', self::$variable, '!' ) ); |
143
|
|
|
return '(' . implode( '|', $allowed ) . ').*'; |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
} |
147
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.