Passed
Push — master ( 6e37f8...05a985 )
by Jeroen De
02:43
created

LatLongParserBase::parse()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 22
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 22
ccs 0
cts 12
cp 0
rs 9.2
c 0
b 0
f 0
cc 3
eloc 12
nc 3
nop 1
crap 12
1
<?php
2
3
namespace DataValues\Geo\Parsers;
4
5
use DataValues\Geo\Values\LatLongValue;
6
use ValueParsers\ParseException;
7
use ValueParsers\ParserOptions;
8
use ValueParsers\StringValueParser;
9
use ValueParsers\ValueParser;
10
11
/**
12
 * @since 0.1, renamed in 2.0
13
 *
14
 * @license GPL-2.0+
15
 * @author H. Snater < [email protected] >
16
 * @author Jeroen De Dauw < [email protected] >
17
 */
18
abstract class LatLongParserBase implements ValueParser {
19
20
	const FORMAT_NAME = 'geo-coordinate';
21
22
	/**
23
	 * The symbols representing the different directions for usage in directional notation.
24
	 */
25
	const OPT_NORTH_SYMBOL = 'north';
26
	const OPT_EAST_SYMBOL = 'east';
27
	const OPT_SOUTH_SYMBOL = 'south';
28
	const OPT_WEST_SYMBOL = 'west';
29
30
	/**
31
	 * The symbol to use as separator between latitude and longitude.
32
	 */
33
	const OPT_SEPARATOR_SYMBOL = 'separator';
34
35
	/**
36
	 * Delimiters used to split a coordinate string when unable to split by using the separator.
37
	 * @var string[]
38
	 */
39
	protected $defaultDelimiters;
40
41
	/**
42
	 * @var ParserOptions
43
	 */
44
	protected $options;
45
46
	public function __construct( ParserOptions $options = null ) {
47
		$this->options = $options ?: new ParserOptions();
48
49
		$this->options->defaultOption( ValueParser::OPT_LANG, 'en' );
50
51
		$this->options->defaultOption( self::OPT_NORTH_SYMBOL, 'N' );
52
		$this->options->defaultOption( self::OPT_EAST_SYMBOL, 'E' );
53
		$this->options->defaultOption( self::OPT_SOUTH_SYMBOL, 'S' );
54
		$this->options->defaultOption( self::OPT_WEST_SYMBOL, 'W' );
55
56
		$this->options->defaultOption( self::OPT_SEPARATOR_SYMBOL, ',' );
57
	}
58
59
	/**
60
	 * Parses a single coordinate segment (either latitude or longitude) and returns it as a float.
61
	 *
62
	 * @param string $coordinateSegment
63
	 *
64
	 * @throws ParseException
65
	 * @return float
66
	 */
67
	abstract protected function getParsedCoordinate( $coordinateSegment );
68
69
	/**
70
	 * Returns whether a coordinate split into its two segments is in the representation expected by
71
	 * this parser.
72
	 *
73
	 * @param string[] $normalizedCoordinateSegments
74
	 *
75
	 * @return boolean
76
	 */
77
	abstract protected function areValidCoordinates( array $normalizedCoordinateSegments );
78
79
	/**
80
	 * @see ValueParser::parse
81
	 *
82
	 * @param string $value
83
	 *
84
	 * @throws ParseException
85
	 * @return LatLongValue
86
	 */
87
	public function parse( $value ) {
88
		if ( !is_string( $value ) ) {
89
			throw new ParseException( 'Not a string' );
90
		}
91
92
		$rawValue = $value;
93
94
		$value = $this->removeInvalidChars( $value );
95
96
		$normalizedCoordinateSegments = $this->splitString( $value );
97
98
		if ( !$this->areValidCoordinates( $normalizedCoordinateSegments ) ) {
99
			throw new ParseException( 'Not a valid geographical coordinate', $rawValue, static::FORMAT_NAME );
100
		}
101
102
		list( $latitude, $longitude ) = $normalizedCoordinateSegments;
103
104
		return new LatLongValue(
105
			$this->getParsedCoordinate( $latitude ),
106
			$this->getParsedCoordinate( $longitude )
107
		);
108
	}
109
110
	/**
111
	 * Returns a string trimmed and with control characters and characters with ASCII values above
112
	 * 126 removed. SPACE characters within the string are not removed to retain the option to split
113
	 * the string using that character.
114
	 *
115
	 * @param string $string
116
	 *
117
	 * @return string
118
	 */
119
	protected function removeInvalidChars( $string ) {
120
		$filtered = [];
121
122
		foreach ( str_split( $string ) as $character ) {
123
			$asciiValue = ord( $character );
124
125
			if (
126
				( $asciiValue >= 32 && $asciiValue < 127 )
127
				|| $asciiValue == 194
128
				|| $asciiValue == 176
129
			) {
130
				$filtered[] = $character;
131
			}
132
		}
133
134
		return trim( implode( '', $filtered ) );
135
	}
136
137
	/**
138
	 * Splits a string into two strings using the separator specified in the options. If the string
139
	 * could not be split using the separator, the method will try to split the string by analyzing
140
	 * the used symbols. If the string could not be split into two parts, an empty array is
141
	 * returned.
142
	 *
143
	 * @param string $normalizedCoordinateString
144
	 *
145
	 * @throws ParseException if unable to split input string into two segments
146
	 * @return string[]
147
	 */
148
	protected function splitString( $normalizedCoordinateString ) {
149
		$separator = $this->getOption( self::OPT_SEPARATOR_SYMBOL );
150
151
		$normalizedCoordinateSegments = explode( $separator, $normalizedCoordinateString );
152
153
		if ( count( $normalizedCoordinateSegments ) !== 2 ) {
154
			// Separator not present within the string, trying to figure out the segments by
155
			// splitting after the first direction character or degree symbol:
156
			$delimiters = $this->defaultDelimiters;
157
158
			$ns = [
159
				$this->getOption( self::OPT_NORTH_SYMBOL ),
160
				$this->getOption( self::OPT_SOUTH_SYMBOL )
161
			];
162
163
			$ew = [
164
				$this->getOption( self::OPT_EAST_SYMBOL ),
165
				$this->getOption( self::OPT_WEST_SYMBOL )
166
			];
167
168
			foreach ( $ns as $delimiter ) {
169
				if ( mb_strpos( $normalizedCoordinateString, $delimiter ) === 0 ) {
170
					// String starts with "north" or "west" symbol: Separation needs to be done
171
					// before the "east" or "west" symbol.
172
					$delimiters = array_merge( $ew, $delimiters );
173
					break;
174
				}
175
			}
176
177
			if ( count( $delimiters ) !== count( $this->defaultDelimiters ) + 2 ) {
178
				$delimiters = array_merge( $ns, $delimiters );
179
			}
180
181
			foreach ( $delimiters as $delimiter ) {
182
				$delimiterPos = mb_strpos( $normalizedCoordinateString, $delimiter );
183
				if ( $delimiterPos !== false ) {
184
					$adjustPos = ( in_array( $delimiter, $ew ) ) ? 0 : mb_strlen( $delimiter );
185
					$normalizedCoordinateSegments = [
186
						mb_substr( $normalizedCoordinateString, 0, $delimiterPos + $adjustPos ),
187
						mb_substr( $normalizedCoordinateString, $delimiterPos + $adjustPos )
188
					];
189
					break;
190
				}
191
			}
192
		}
193
194
		if ( count( $normalizedCoordinateSegments ) !== 2 ) {
195
			throw new ParseException( __CLASS__ . ': Unable to split string '
196
				. $normalizedCoordinateString . ' into two coordinate segments' );
197
		}
198
199
		return $normalizedCoordinateSegments;
200
	}
201
202
	/**
203
	 * Turns directional notation (N/E/S/W) of a single coordinate into non-directional notation
204
	 * (+/-).
205
	 * This method assumes there are no preceding or tailing spaces.
206
	 *
207
	 * @param string $coordinateSegment
208
	 *
209
	 * @return string
210
	 */
211
	protected function resolveDirection( $coordinateSegment ) {
212
		$n = $this->getOption( self::OPT_NORTH_SYMBOL );
213
		$e = $this->getOption( self::OPT_EAST_SYMBOL );
214
		$s = $this->getOption( self::OPT_SOUTH_SYMBOL );
215
		$w = $this->getOption( self::OPT_WEST_SYMBOL );
216
217
		// If there is a direction indicator, remove it, and prepend a minus sign for south and west
218
		// directions. If there is no direction indicator, the coordinate is already non-directional
219
		// and no work is required.
220
		foreach ( [ $n, $e, $s, $w ] as $direction ) {
221
			// The coordinate segment may either start or end with a direction symbol.
222
			preg_match(
223
				'/^(' . $direction . '|)([^' . $direction . ']+)(' . $direction . '|)$/i',
224
				$coordinateSegment,
225
				$matches
226
			);
227
228
			if ( $matches[1] === $direction || $matches[3] === $direction ) {
229
				$coordinateSegment = $matches[2];
230
231
				if ( in_array( $direction, [ $s, $w ] ) ) {
232
					$coordinateSegment = '-' . $coordinateSegment;
233
				}
234
235
				return $coordinateSegment;
236
			}
237
		}
238
239
		// Coordinate segment does not include a direction symbol.
240
		return $coordinateSegment;
241
	}
242
243
	protected function getOption( $optionName ) {
244
		return $this->options->getOption( $optionName );
245
	}
246
247
}
248