JaTokenizerTest::icuTextProvider() - Code Metrics - Inspection of "Add SanitizerFactory" - onoi/tesa - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 9401a4...2cc7a2 )

by mw

created 2016-08-02 14:22 UTC

JaTokenizerTest::icuTextProvider() B

↳ Parent: JaTokenizerTest

Complexity

Conditions	1
Paths	1

Size

Total Lines	27
Code Lines	19

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
c	1
b	0
f	1
dl	0
loc	27
rs	8.8571
cc	1
eloc	19
nc	1
nop	0

1			<?php
2
3			namespace Onoi\Tesa\Tests\Integration;
4
5			use Onoi\Tesa\SanitizerFactory;
6
7			/**
8			* @group onoi-tesa
9			*
10			* @license GNU GPL v2+
11			* @since 0.1
12			*
13			* @author mwjames
14			*/
15			class JaTokenizerTest extends \PHPUnit_Framework_TestCase {
16
17			/**
18			* @dataProvider icuTextProvider
19			*/
20			public function testIcuWordBoundaryTokenizer( $text, $expected ) {
21
22			$sanitizerFactory = new SanitizerFactory();
23
24			$tokenier = $sanitizerFactory->newIcuWordBoundaryTokenizer(
25			$sanitizerFactory->newGenericRegExTokenizer()
26			);
27
28			if ( !$tokenier->isAvailable() ) {
29			$this->markTestSkipped( 'ICU extension is not available.' );
30			}
31
32			$this->assertEquals(
33			$expected,
34			$tokenier->tokenize( $text )
35			);
36			}
37
38			/**
39			* @dataProvider tinyTextProvider
40			*/
41			public function testJaTinySegmenterTokenizer( $text, $expected ) {
42
43			$sanitizerFactory = new SanitizerFactory();
44
45			$tokenier = $sanitizerFactory->newJaTinySegmenterTokenizer(
46			$sanitizerFactory->newPunctuationRegExTokenizer()
47			);
48
49			$this->assertEquals(
50			$expected,
51			$tokenier->tokenize( $text )
52			);
53			}
54
55			public function icuTextProvider() {
56
57			// https://github.com/NaturalNode/natural/blob/master/spec/tokenizer_ja_spec.js
58
59			$provider[] = array(
			0 ignored issues – show Coding Style Comprehensibility introduced 2016-08-02 14:35 UTC by Report Bug Copy Issue Report `$provider` was never initialized. Although not strictly required by PHP, it is generally a good practice to add `$provider = array();` before regardless. Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code. Let’s take a look at an example: foreach ($collection as $item) { $myArray['foo'] = $item->getFoo(); if ($item->hasBar()) { $myArray['bar'] = $item->getBar(); } // do something with $myArray } As you can see in this example, the array `$myArray` is initialized the first time when the foreach loop is entered. You can also see that the value of the `bar` key is only written conditionally; thus, its value might result from a previous iteration. This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization `$myArray = array()` either outside or inside the foreach loop. Loading history...
60			"計算機科学における字句解析 (じくかいせき、英: Lexical Analysis) とは、ソースコードを構成する文字の並びを、トークン (token) の並びに変換することをいう。\n" .
61			"ここでいう「トークン」とは、意味を持つコードの最小単位のこと。字句解析を行うプログラムは、字句解析器 (lexical analyzer, 略称：lexer) と呼ばれる。\n" .
62			"字句解析器はスキャナ (scanner) とトークナイザ (tokenizer) から構成される。\n",
63			//
64			array(
65			'計算機', '科学', 'における', '字句', '解析', 'じ', 'く', 'かい',
66			'せき', '、', '英', 'Lexical', 'Analysis', 'と', 'は', '、', 'ソース', 'コード', 'を',
67			'構成', 'する', '文字', 'の', '並び', 'を', '、', 'トーク',
68			'ン', 'token', 'の', '並びに', '変換', 'する', 'こと', 'を',
69			'いう', '。', 'ここ', 'で', 'いう', '「', 'トーク', 'ン',
70			'」', 'と', 'は', '、', '意味', 'を', '持つ', 'コード',
71			'の', '最小', '単位', 'の', 'こと', '。', '字句', '解析',
72			'を', '行う', 'プログラム', 'は', '、', '字句', '解析', '器',
73			'lexical', 'analyzer', '略称', '：',
74			'lexer', 'と', '呼ばれる', '。', '字句', '解析', '器', 'は',
75			'スキャナ', 'scanner', 'と', 'トーク','ナ', 'イザ', 'tokenizer', 'から',
76			'構成', 'さ', 'れる', '。'
77			)
78			);
79
80			return $provider;
81			}
82
83			public function tinyTextProvider() {
84
85			// https://github.com/NaturalNode/natural/blob/master/spec/tokenizer_ja_spec.js
86			/*
87			['計算', '機科', '学', 'に', 'おける', '字句', '解析',
88			'じくかい', 'せき', '英', 'Lexical', 'Analysis', 'と', 'は', 'ソースコード',
89			'を', '構成', 'する', '文字', 'の', '並び', 'を', 'トークン', 'token', 'の',
90			'並び', 'に', '変換', 'する', 'こと', 'を', 'いう', 'ここ', 'でいう', 'トークン',
91			'と', 'は', '意味', 'を', '持つ', 'コード', 'の', '最小', '単位', 'の', 'こと',
92			'字句', '解析', 'を', '行う', 'プログラム', 'は', '字句', '解析', '器', 'lexical',
93			'analyzer', '略称', 'lexer', 'と', '呼ば', 'れる', '字句', '解析', '器', 'は',
94			'スキャナ', 'scanner', 'と', 'トークナイザ', 'tokenizer', 'から', '構成', 'さ',
95			'れる']
96			*/
97
98			$provider[] = array(
			0 ignored issues – show Coding Style Comprehensibility introduced 2016-08-02 14:35 UTC by Report Bug Copy Issue Report `$provider` was never initialized. Although not strictly required by PHP, it is generally a good practice to add `$provider = array();` before regardless. Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code. Let’s take a look at an example: foreach ($collection as $item) { $myArray['foo'] = $item->getFoo(); if ($item->hasBar()) { $myArray['bar'] = $item->getBar(); } // do something with $myArray } As you can see in this example, the array `$myArray` is initialized the first time when the foreach loop is entered. You can also see that the value of the `bar` key is only written conditionally; thus, its value might result from a previous iteration. This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization `$myArray = array()` either outside or inside the foreach loop. Loading history...
99			"計算機科学における字句解析 (じくかいせき、英: Lexical Analysis) とは、ソースコードを構成する文字の並びを、トークン (token) の並びに変換することをいう。\n" .
100			"ここでいう「トークン」とは、意味を持つコードの最小単位のこと。字句解析を行うプログラムは、字句解析器 (lexical analyzer, 略称：lexer) と呼ばれる。\n" .
101			"字句解析器はスキャナ (scanner) とトークナイザ (tokenizer) から構成される。\n",
102			//
103			array(
104			'計算', '機科', '学', 'に', 'おける', '字句', '解析', 'じくかい','せき','英',
105			'Lexical', 'Analysis', 'と', 'は', 'ソースコード', 'を', '構成', 'する',
106			'文字', 'の', '並び', 'を', 'トークン', 'token', 'の', '並び', 'に', '変換',
107			'する', 'こと', 'をいう', 'ここ', 'でいう', 'トークン', 'と', 'は', '意味', 'を',
108			'持つ', 'コード', 'の', '最小', '単位', 'の', 'こと', '字句', '解析', 'を',
109			'行う', 'プログラム', 'は', '字句', '解析', '器', 'lexical', 'analyzer',
110			'略称', 'lexer', 'と', '呼ば', 'れる', '字句', '解析', '器', 'は', 'スキャナ', 'scanner',
111			'と', 'トークナイザ', 'tokenizer', 'から', '構成', 'さ', 'れる',
112			)
113			);
114
115			return $provider;
116			}
117
118			}
119

onoi / tesa

Push — master ( 9401a4...2cc7a2 )

JaTokenizerTest::icuTextProvider() B

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like