Simple::tokenize()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 9
ccs 6
cts 6
cp 1
rs 9.6666
cc 1
eloc 6
nc 1
nop 1
crap 1
1
<?php
2
/**
3
 * Copyright (c) 2016 Martin Dilling-Hansen <[email protected]>
4
 * https://github.com/scripturadesign/tokenizer
5
 */
6
7
namespace Scriptura\Tokenizer\Tokenizers;
8
9
use Scriptura\Tokenizer\Tokenizer;
10
11
class Simple implements Tokenizer
12
{
13
    /**
14
     * Get the token sequence from a character sequence
15
     *
16
     * @param string $string
17
     *
18
     * @return array
19
     */
20 12
    public function tokenize($string)
21
    {
22 12
        $string = $this->wrapInSpaces($string);
23 12
        $string = $this->spaceBeforeContractions($string);
24 12
        $string = $this->concatenateDoubleOrMoreSpaces($string);
25 12
        $string = $this->removeStartingAndEndingSpaces($string);
26
27 12
        return explode(' ', $string);
28
    }
29
30 12
    protected function wrapInSpaces($string)
31
    {
32 12
        return ' ' . $string . ' ';
33
    }
34
35 12
    protected function spaceBeforeContractions($string)
36
    {
37
        // Special cases
38 12
        $string = preg_replace('/[^\w](AI)(N\'T) /', ' AM ${2} ', $string);
39 12
        $string = preg_replace('/[^\w](ai)(n\'t) /i', ' am ${2} ', $string);
40 12
        $string = preg_replace('/[^\w](ca(n))(\'t) /i', ' ${1} ${2}${3} ', $string);
41
42
43
        // The rest
44 12
        $string = preg_replace('/(\'[sSmMdD]) /', ' ${1} ', $string);
45 12
        $string = preg_replace('/(\'ll|\'LL|\'re|\'RE|\'ve|\'VE|n\'t|N\'T) /', ' ${1} ', $string);
46
47 12
        return $string;
48
    }
49
50 12
    protected function concatenateDoubleOrMoreSpaces($string)
51
    {
52 12
        return preg_replace('/  +/', ' ', $string);
53
    }
54
55 12
    protected function removeStartingAndEndingSpaces($string)
56
    {
57 12
        return trim($string);
58
    }
59
}
60