Completed
Push — master ( 9401a4...2cc7a2 )
by mw
13:59
created

PunctuationRegExTokenizer   A

Complexity

Total Complexity 9

Size/Duplication

Total Lines 75
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 1
Bugs 0 Features 1
Metric Value
wmc 9
c 1
b 0
f 1
lcom 1
cbo 1
dl 0
loc 75
rs 10

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A setOption() 0 10 3
A isWordTokenizer() 0 3 2
A tokenize() 0 20 3
1
<?php
2
3
namespace Onoi\Tesa\Tokenizer;
4
5
/**
6
 * @license GNU GPL v2+
7
 * @since 0.1
8
 *
9
 * @author mwjames
10
 */
11
class PunctuationRegExTokenizer implements Tokenizer {
12
13
	/**
14
	 * @var Tokenizer
15
	 */
16
	private $tokenizer;
17
18
	/**
19
	 * @var string
20
	 */
21
	private $patternExemption = '';
22
23
	/**
24
	 * @since 0.1
25
	 *
26
	 * @param Tokenizer|null $tokenizer
27
	 */
28
	public function __construct( Tokenizer $tokenizer = null ) {
29
		$this->tokenizer = $tokenizer;
30
	}
31
32
	/**
33
	 * @since 0.1
34
	 *
35
	 * {@inheritDoc}
36
	 */
37
	public function setOption( $name, $value ) {
38
39
		if ( $this->tokenizer !== null ) {
40
			$this->tokenizer->setOption( $name, $value );
41
		}
42
43
		if ( $name === self::REGEX_EXEMPTION ) {
44
			$this->patternExemption = $value;
45
		}
46
	}
47
48
	/**
49
	 * @since 0.1
50
	 *
51
	 * {@inheritDoc}
52
	 */
53
	public function isWordTokenizer() {
54
		return $this->tokenizer !== null ? $this->tokenizer->isWordTokenizer() : true;
55
	}
56
57
	/**
58
	 * @since 0.1
59
	 *
60
	 * @param string $string
61
	 *
62
	 * @return array|false
63
	 */
64
	public function tokenize( $string ) {
65
66
		if ( $this->tokenizer !== null ) {
67
			$string = implode( " ", $this->tokenizer->tokenize( $string ) );
68
		}
69
70
		$pattern = str_replace(
71
			$this->patternExemption,
72
			'',
73
			'_-・,、;:!?.。…◆★◇□■()【】《》〈〉;:“”"〃'`[]{}「」@*\/&#%`^+<=>|~≪≫─$"_\-・,、;:!?.。()[\]{}「」@*\/&#%`^+<=>|~«»$"\s'
74
		);
75
76
		$result = preg_split( '/[' . $pattern . ']+/u', $string, null, PREG_SPLIT_NO_EMPTY );
77
78
		if ( $result === false ) {
79
			$result = array();
80
		}
81
82
		return $result;
83
	}
84
85
}
86