CJKSimpleCharacterRegExTokenizer   A
last analyzed

Complexity

Total Complexity 8

Size/Duplication

Total Lines 74
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Test Coverage

Coverage 96%

Importance

Changes 0
Metric Value
wmc 8
lcom 1
cbo 1
dl 0
loc 74
ccs 24
cts 25
cp 0.96
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A setOption() 0 10 3
A isWordTokenizer() 0 3 1
A tokenize() 0 21 3
1
<?php
2
3
namespace Onoi\Tesa\Tokenizer;
4
5
/**
6
 * @license GNU GPL v2+
7
 * @since 0.1
8
 *
9
 * @author mwjames
10
 */
11
class CJKSimpleCharacterRegExTokenizer implements Tokenizer {
12
13
	/**
14
	 * @var Tokenizer
15
	 */
16
	private $tokenizer;
17
18
	/**
19
	 * @var string
20
	 */
21
	private $patternExemption = '';
22
23
	/**
24
	 * @since 0.1
25
	 *
26
	 * @param Tokenizer $tokenizer
27
	 */
28 4
	public function __construct( Tokenizer $tokenizer = null ) {
29 4
		$this->tokenizer = $tokenizer;
30 4
	}
31
32
	/**
33
	 * @since 0.1
34
	 *
35
	 * {@inheritDoc}
36
	 */
37 1
	public function setOption( $name, $value ) {
38
39 1
		if ( $this->tokenizer !== null ) {
40 1
			$this->tokenizer->setOption( $name, $value );
41 1
		}
42
43 1
		if ( $name === self::REGEX_EXEMPTION ) {
44 1
			$this->patternExemption = $value;
45 1
		}
46 1
	}
47
48
	/**
49
	 * @since 0.1
50
	 *
51
	 * {@inheritDoc}
52
	 */
53 2
	public function isWordTokenizer() {
54 2
		return false;
55
	}
56
57
	/**
58
	 * @since 0.1
59
	 *
60
	 * {@inheritDoc}
61
	 */
62 3
	public function tokenize( $string ) {
63
64 3
		if ( $this->tokenizer !== null ) {
65 1
			$string = implode( " ", $this->tokenizer->tokenize( $string ) );
66 1
		}
67
68
		// Filter is based on https://github.com/kitech/cms-drupal/blob/master/modules/csplitter/filter.txt
69 3
		$pattern = str_replace(
70 3
			$this->patternExemption,
71 3
			'',
72
			'([\s\、,,。/?《》〈〉;:“”"〃'`[]{}\|~!-=_+)(()*…—─%¥…◆★◇□■【】#·啊吧把并被才从的得当对但到地而该过个给还和叫将就可来了啦里没你您哪那呢去却让使是时省随他我为现县向像象要由矣已以也又与于在之这则最乃\/\(\)\[\]{}<>\r\n"]|(?<!\d)\.(?!\d))'
73 3
		);
74
75 3
		$result = preg_split( '/' . $pattern . '/u', $string, null, PREG_SPLIT_NO_EMPTY );
76
77 3
		if ( $result !== false ) {
78 3
			return $result;
79
		}
80
81
		return array();
82
	}
83
84
}
85