PunctuationRegExTokenizer   A
last analyzed

Complexity

Total Complexity 9

Size/Duplication

Total Lines 75
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 92.31%

Importance

Changes 0
Metric Value
wmc 9
lcom 1
cbo 1
dl 0
loc 75
ccs 24
cts 26
cp 0.9231
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A setOption() 0 10 3
A isWordTokenizer() 0 3 2
A tokenize() 0 20 3
1
<?php
2
3
namespace Onoi\Tesa\Tokenizer;
4
5
/**
6
 * @license GNU GPL v2+
7
 * @since 0.1
8
 *
9
 * @author mwjames
10
 */
11
class PunctuationRegExTokenizer implements Tokenizer {
12
13
	/**
14
	 * @var Tokenizer
15
	 */
16
	private $tokenizer;
17
18
	/**
19
	 * @var string
20
	 */
21
	private $patternExemption = '';
22
23
	/**
24
	 * @since 0.1
25
	 *
26
	 * @param Tokenizer|null $tokenizer
27
	 */
28 7
	public function __construct( Tokenizer $tokenizer = null ) {
29 7
		$this->tokenizer = $tokenizer;
30 7
	}
31
32
	/**
33
	 * @since 0.1
34
	 *
35
	 * {@inheritDoc}
36
	 */
37 4
	public function setOption( $name, $value ) {
38
39 4
		if ( $this->tokenizer !== null ) {
40 1
			$this->tokenizer->setOption( $name, $value );
41 1
		}
42
43 4
		if ( $name === self::REGEX_EXEMPTION ) {
44 4
			$this->patternExemption = $value;
45 4
		}
46 4
	}
47
48
	/**
49
	 * @since 0.1
50
	 *
51
	 * {@inheritDoc}
52
	 */
53 4
	public function isWordTokenizer() {
54 4
		return $this->tokenizer !== null ? $this->tokenizer->isWordTokenizer() : true;
55
	}
56
57
	/**
58
	 * @since 0.1
59
	 *
60
	 * @param string $string
61
	 *
62
	 * @return array|false
63
	 */
64 5
	public function tokenize( $string ) {
65
66 5
		if ( $this->tokenizer !== null ) {
67 1
			$string = implode( " ", $this->tokenizer->tokenize( $string ) );
68 1
		}
69
70 5
		$pattern = str_replace(
71 5
			$this->patternExemption,
72 5
			'',
73
			'_-・,、;:!?.。…◆★◇□■()【】《》〈〉;:“”"〃'`[]{}「」@*\/&#%`^+<=>|~≪≫─$"_\-・,、;:!?.。()[\]{}「」@*\/&#%`^+<=>|~«»$"\s'
74 5
		);
75
76 5
		$result = preg_split( '/[' . $pattern . ']+/u', $string, null, PREG_SPLIT_NO_EMPTY );
77
78 5
		if ( $result === false ) {
79
			$result = array();
80
		}
81
82 5
		return $result;
83
	}
84
85
}
86