Completed
Pull Request — master (#23)
by no
04:44 queued 02:28
created

LatLongParserBase::removeInvalidChars()   B

Complexity

Conditions 6
Paths 3

Size

Total Lines 17
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 8.8571
c 0
b 0
f 0
cc 6
eloc 10
nc 3
nop 1
1
<?php
2
3
namespace DataValues\Geo\Parsers;
4
5
use DataValues\Geo\Values\LatLongValue;
6
use ValueParsers\ParseException;
7
use ValueParsers\ParserOptions;
8
use ValueParsers\StringValueParser;
9
10
/**
11
 * @since 0.1
12
 *
13
 * @license GPL-2.0+
14
 * @author H. Snater < [email protected] >
15
 * @author Jeroen De Dauw < [email protected] >
16
 */
17
abstract class LatLongParserBase extends StringValueParser {
18
19
	const FORMAT_NAME = 'geo-coordinate';
20
21
	/**
22
	 * The symbols representing the different directions for usage in directional notation.
23
	 * @since 0.1
24
	 */
25
	const OPT_NORTH_SYMBOL = 'north';
26
	const OPT_EAST_SYMBOL = 'east';
27
	const OPT_SOUTH_SYMBOL = 'south';
28
	const OPT_WEST_SYMBOL = 'west';
29
30
	/**
31
	 * The symbol to use as separator between latitude and longitude.
32
	 * @since 0.1
33
	 */
34
	const OPT_SEPARATOR_SYMBOL = 'separator';
35
36
	/**
37
	 * Delimiters used to split a coordinate string when unable to split by using the separator.
38
	 * @var string[]
39
	 */
40
	protected $defaultDelimiters;
41
42
	/**
43
	 * @since 0.1
44
	 *
45
	 * @param ParserOptions|null $options
46
	 */
47
	public function __construct( ParserOptions $options = null ) {
48
		parent::__construct( $options );
49
50
		$this->defaultOption( self::OPT_NORTH_SYMBOL, 'N' );
51
		$this->defaultOption( self::OPT_EAST_SYMBOL, 'E' );
52
		$this->defaultOption( self::OPT_SOUTH_SYMBOL, 'S' );
53
		$this->defaultOption( self::OPT_WEST_SYMBOL, 'W' );
54
55
		$this->defaultOption( self::OPT_SEPARATOR_SYMBOL, ',' );
56
	}
57
58
	/**
59
	 * Parses a single coordinate segment (either latitude or longitude) and returns it as a float.
60
	 *
61
	 * @since 0.1
62
	 *
63
	 * @param string $coordinateSegment
64
	 *
65
	 * @throws ParseException
66
	 * @return float
67
	 */
68
	abstract protected function getParsedCoordinate( $coordinateSegment );
69
70
	/**
71
	 * Returns whether a coordinate split into its two segments is in the representation expected by
72
	 * this parser.
73
	 *
74
	 * @since 0.1
75
	 *
76
	 * @param string[] $normalizedCoordinateSegments
77
	 *
78
	 * @return boolean
79
	 */
80
	abstract protected function areValidCoordinates( array $normalizedCoordinateSegments );
81
82
	/**
83
	 * @see StringValueParser::stringParse
84
	 *
85
	 * @since 0.1
86
	 *
87
	 * @param string $value
88
	 *
89
	 * @throws ParseException
90
	 * @return LatLongValue
91
	 */
92
	protected function stringParse( $value ) {
93
		$rawValue = $value;
94
95
		$value = $this->removeInvalidChars( $value );
96
97
		$normalizedCoordinateSegments = $this->splitString( $value );
98
99
		if ( !$this->areValidCoordinates( $normalizedCoordinateSegments ) ) {
100
			throw new ParseException( 'Not a valid geographical coordinate', $rawValue, static::FORMAT_NAME );
101
		}
102
103
		list( $latitude, $longitude ) = $normalizedCoordinateSegments;
104
105
		return new LatLongValue(
106
			$this->getParsedCoordinate( $latitude ),
107
			$this->getParsedCoordinate( $longitude )
108
		);
109
	}
110
111
	/**
112
	 * Returns a string trimmed and with control characters and characters with ASCII values above
113
	 * 126 removed. SPACE characters within the string are not removed to retain the option to split
114
	 * the string using that character.
115
	 *
116
	 * @since 0.1
117
	 *
118
	 * @param string $string
119
	 *
120
	 * @return string
121
	 */
122
	protected function removeInvalidChars( $string ) {
123
		$filtered = array();
124
125
		foreach ( str_split( $string ) as $character ) {
126
			$asciiValue = ord( $character );
127
128
			if (
129
				( $asciiValue >= 32 && $asciiValue < 127 )
130
				|| $asciiValue == 194
131
				|| $asciiValue == 176
132
			) {
133
				$filtered[] = $character;
134
			}
135
		}
136
137
		return trim( implode( '', $filtered ) );
138
	}
139
140
	/**
141
	 * Splits a string into two strings using the separator specified in the options. If the string
142
	 * could not be split using the separator, the method will try to split the string by analyzing
143
	 * the used symbols. If the string could not be split into two parts, an empty array is
144
	 * returned.
145
	 *
146
	 * @since 0.1
147
	 *
148
	 * @param string $normalizedCoordinateString
149
	 *
150
	 * @throws ParseException if unable to split input string into two segments
151
	 * @return string[]
152
	 */
153
	protected function splitString( $normalizedCoordinateString ) {
154
		$separator = $this->getOption( self::OPT_SEPARATOR_SYMBOL );
155
156
		$normalizedCoordinateSegments = explode( $separator, $normalizedCoordinateString );
157
158
		if ( count( $normalizedCoordinateSegments ) !== 2 ) {
159
			// Separator not present within the string, trying to figure out the segments by
160
			// splitting after the first direction character or degree symbol:
161
			$delimiters = $this->defaultDelimiters;
162
163
			$ns = array(
164
				$this->getOption( self::OPT_NORTH_SYMBOL ),
165
				$this->getOption( self::OPT_SOUTH_SYMBOL )
166
			);
167
168
			$ew = array(
169
				$this->getOption( self::OPT_EAST_SYMBOL ),
170
				$this->getOption( self::OPT_WEST_SYMBOL )
171
			);
172
173
			foreach ( $ns as $delimiter ) {
174
				if ( mb_strpos( $normalizedCoordinateString, $delimiter ) === 0 ) {
175
					// String starts with "north" or "west" symbol: Separation needs to be done
176
					// before the "east" or "west" symbol.
177
					$delimiters = array_merge( $ew, $delimiters );
178
					break;
179
				}
180
			}
181
182
			if ( count( $delimiters ) !== count( $this->defaultDelimiters ) + 2 ) {
183
				$delimiters = array_merge( $ns, $delimiters );
184
			}
185
186
			foreach ( $delimiters as $delimiter ) {
187
				$delimiterPos = mb_strpos( $normalizedCoordinateString, $delimiter );
188
				if ( $delimiterPos !== false ) {
189
					$adjustPos = ( in_array( $delimiter, $ew ) ) ? 0 : mb_strlen( $delimiter );
190
					$normalizedCoordinateSegments = array(
191
						mb_substr( $normalizedCoordinateString, 0, $delimiterPos + $adjustPos ),
192
						mb_substr( $normalizedCoordinateString, $delimiterPos + $adjustPos )
193
					);
194
					break;
195
				}
196
			}
197
		}
198
199
		if ( count( $normalizedCoordinateSegments ) !== 2 ) {
200
			throw new ParseException( __CLASS__ . ': Unable to split string '
201
				. $normalizedCoordinateString . ' into two coordinate segments' );
202
		}
203
204
		return $normalizedCoordinateSegments;
205
	}
206
207
	/**
208
	 * Turns directional notation (N/E/S/W) of a single coordinate into non-directional notation
209
	 * (+/-).
210
	 * This method assumes there are no preceding or tailing spaces.
211
	 *
212
	 * @since 0.1
213
	 *
214
	 * @param string $coordinateSegment
215
	 *
216
	 * @return string
217
	 */
218
	protected function resolveDirection( $coordinateSegment ) {
219
		$n = $this->getOption( self::OPT_NORTH_SYMBOL );
220
		$e = $this->getOption( self::OPT_EAST_SYMBOL );
221
		$s = $this->getOption( self::OPT_SOUTH_SYMBOL );
222
		$w = $this->getOption( self::OPT_WEST_SYMBOL );
223
224
		// If there is a direction indicator, remove it, and prepend a minus sign for south and west
225
		// directions. If there is no direction indicator, the coordinate is already non-directional
226
		// and no work is required.
227
		foreach ( array( $n, $e, $s, $w ) as $direction ) {
228
			// The coordinate segment may either start or end with a direction symbol.
229
			preg_match(
230
				'/^(' . $direction . '|)([^' . $direction . ']+)(' . $direction . '|)$/i',
231
				$coordinateSegment,
232
				$matches
233
			);
234
235
			if ( $matches[1] === $direction || $matches[3] === $direction ) {
236
				$coordinateSegment = $matches[2];
237
238
				if ( in_array( $direction, array( $s, $w ) ) ) {
239
					$coordinateSegment = '-' . $coordinateSegment;
240
				}
241
242
				return $coordinateSegment;
243
			}
244
		}
245
246
		// Coordinate segment does not include a direction symbol.
247
		return $coordinateSegment;
248
	}
249
250
}
251