RegexHelper::getMatches()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 2
Metric Value
c 3
b 0
f 2
dl 0
loc 11
rs 9.4285
cc 2
eloc 7
nc 2
nop 3
1
<?php
2
3
namespace Asparagus;
4
5
/**
6
 * Package-private class to help with regexes.
7
 *
8
 * Supported magic words are:
9
 * - \{variable}
10
 * - \{iri}
11
 * - \{prefix}
12
 * - \{name}
13
 * - \{prefixed_iri}
14
 * - \{native}
15
 * - \{path}
16
 * - \{function}
17
 *
18
 * @license GNU GPL v2+
19
 * @author Bene* < [email protected] >
20
 */
21
class RegexHelper {
22
23
	/**
24
	 * @var string regex to match variables
25
	 */
26
	private static $variable = '[?$](\w+)';
27
28
	/**
29
	 * @var string regex to match IRIs
30
	 */
31
	private static $iri = '[^\s<>"{}|\\\\^`]+';
32
33
	/**
34
	 * @var string regex to match prefixes
35
	 */
36
	private static $prefix = '\w+';
37
38
	/**
39
	 * @var string regex to match names after prefixes
40
	 */
41
	private static $name = '\w+';
42
43
	/**
44
	 * @var string regex to match strings and numbers
45
	 */
46
	private static $native = '([0-9]+|".*")';
47
48
	/**
49
	 * @var string[] list of natively supported functions
50
	 */
51
	private static $functions = array(
52
		'COUNT', 'SUM', 'MIN', 'MAX', 'AVG', 'SAMPLE', 'GROUP_CONCAT', 'STR',
53
		'LANG', 'LANGMATCHES', 'DATATYPE', 'BOUND', 'IRI', 'URI', 'BNODE',
54
		'RAND', 'ABS', 'CEIL', 'FLOOR', 'ROUND', 'CONCAT', 'STRLEN', 'UCASE',
55
		'LCASE', 'ENCODE_FOR_URI', 'CONTAINS', 'STRSTARTS', 'STRENDS',
56
		'STRBEFORE', 'STRAFTER', 'YEAR', 'MONTH', 'DAY', 'HOURS', 'MINUTES',
57
		'SECONDS', 'TIMEZONE', 'TZ', 'NOW', 'UUID', 'STRUUID', 'MD5', 'SHA1',
58
		'SHA256', 'SHA384', 'SHA512', 'COALESCE', 'IF', 'STRLANG', 'STRDT',
59
		'sameTerm', 'isIRI', 'isURI', 'isBLANK', 'isLITERAL', 'isNUMERIC',
60
		'REGEX', 'SUBSTR', 'REPLACE'
61
	);
62
63
	/**
64
	 * Checks if the expression matches the given regex.
65
	 *
66
	 * @param string $regex
67
	 * @param string $expression
68
	 * @return bool
69
	 */
70
	public function matchesRegex( $regex, $expression ) {
71
		return preg_match( '/^' . $this->resolveMagic( $regex ) . '$/i', $expression ) === 1;
72
	}
73
74
	/**
75
	 * Returns all matching groups for the given regex.
76
	 * String and IRI equences are automatically escaped.
77
	 *
78
	 * @param string $regex
79
	 * @param string $expression
80
	 * @param int $group
81
	 * @return string[]
82
	 */
83
	public function getMatches( $regex, $expression, $group = 1 ) {
84
		if ( preg_match_all(
85
			'/' . $this->resolveMagic( $regex ) . '/',
86
			$this->escapeSequences( $expression ),
87
			$matches
88
		) ) {
89
			return $matches[$group];
90
		}
91
92
		return array();
93
	}
94
95
	/**
96
	 * Escapes all sequences (IRIs and strings) and sets the replacements.
97
	 *
98
	 * @param string $expression
99
	 * @param string[] $replacements
100
	 * @return string
101
	 */
102
	public function escapeSequences( $expression, &$replacements = null ) {
103
		$replacements = array();
104
		// @todo this is not completely safe but works in most cases
105
		// @todo for strings use http://stackoverflow.com/questions/171480/regex-grabbing-values-between-quotation-marks
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
106
		return preg_replace_callback(
107
			'/("([^\"]*)"|\<([^\>]*)\>)/',
108
			function( $match ) use ( &$replacements ) {
109
				$key = '<' . md5( $match[0] ) . '>';
110
				$replacements[$key] = $match[0];
111
				return $key;
112
			},
113
			$expression
114
		);
115
	}
116
117
	private function resolveMagic( $regex ) {
118
		$magics = array(
119
			'\{variable}' => self::$variable,
120
			'\{iri}' => self::$iri,
121
			'\{prefix}' => self::$prefix,
122
			'\{name}' => self::$name,
123
			'\{prefixed_iri}' => $this->getPrefixedIriRegex(),
124
			'\{native}' => self::$native,
125
			'\{path}' => $this->getPathRegex(),
126
			'\{function}' => $this->getFunctionRegex()
127
		);
128
129
		return strtr( $regex, $magics );
130
	}
131
132
	private function getPrefixedIriRegex() {
133
		return '(' . self::$prefix . ':' . self::$name . '|\<' . self::$iri . '\>)';
134
	}
135
136
	private function getPathRegex() {
137
		$element = '!?\^?(a|' . $this->getPrefixedIriRegex() . '|\((?1)\))(\?|\*|\+)?';
138
		return '(' . $element . '([\/\|]' . $element . ')*)';
139
	}
140
141
	private function getFunctionRegex() {
142
		$allowed = array_merge( self::$functions, array( '\<' . self::$iri . '\>', self::$prefix . ':', self::$variable, '!' ) );
143
		return '(' . implode( '|', $allowed ) . ').*';
144
	}
145
146
}
147