Failed Conditions
Push — master ( b7bcff...572aee )
by
unknown
03:12
created

TextFormat   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 58
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 4
eloc 19
c 1
b 0
f 0
dl 0
loc 58
ccs 16
cts 16
cp 1
rs 10

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 2 1
A notExactTerms() 0 20 2
A exactTerms() 0 12 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Application;
6
7
use Normalizer;
8
9
/**
10
 * Utility class to parse, format and extract terms from a text.
11
 */
12
class TextFormat
13
{
14
    public const PUNCTUATIONS = [
15
        '.', '।', '։', '。', '۔', '⳹', '܁', '።', '᙮', '᠃', '⳾', '꓿', '꘎', '꛳', '࠽', '᭟', ',', '،', '、', '՝', '߸', '፣',
16
        '᠈', '꓾', '꘍', '꛵', '᭞', '⁇', '⁉', '⁈', '‽', '❗', '‼', '⸘', '?', ';', '¿', '؟', '՞', '܆', '፧', '⳺', '⳻', '꘏',
17
        '꛷', '𑅃', '꫱', '!', '¡', '߹', '᥄', '·', '𐎟', '𐏐', '𒑰', '፡', ' ', '𐤟', '࠰', '—', '–', '‒', '‐', '⁃', '﹣', '-',
18
        '֊', '᠆', ';', '·', '؛', '፤', '꛶', '․', ':', '፥', '꛴', '᭝', '…', '︙', 'ຯ', '«', '‹', '»', '›', '„', '‚', '“',
19
        '‟', '‘', '‛', '”', '’', '"', "'", '(', ')',
20
    ];
21
22 6
    public function __construct(private string $text)
23
    {
24 6
    }
25
26
    /**
27
     * Parse the term to extract a list of words that are not quoted.
28
     *
29
     * @return string[]
30
     */
31 6
    public function notExactTerms(): array
32
    {
33
        /** @var string $term */
34 6
        $term = Normalizer::normalize($this->text);
35
36
        // Drop empty quote
37 6
        $term = str_replace('""', '', $term);
38
39
        // Extract exact terms that are quoted
40 6
        preg_match_all('~"([^"]*)"~', $term, $m);
41 6
        $termWithoutExact = str_replace($m[0], ' ', $term);
42 6
        $termWithoutExactWithoutPunctuations = str_replace(self::PUNCTUATIONS, ' ', $termWithoutExact);
43
44
        // Split words by any whitespace
45 6
        $words = preg_split('/[[:space:]]+/', $termWithoutExactWithoutPunctuations, -1, PREG_SPLIT_NO_EMPTY) ?: [];
46
47
        // Drop duplicates
48 6
        $words = array_unique($words);
49
50 6
        return $words;
51
    }
52
53
    /**
54
     * Parse the term to extract a list of words and quoted terms.
55
     *
56
     * @return string[]
57
     */
58 6
    public function exactTerms(): array
59
    {
60
        /** @var string $term */
61 6
        $term = Normalizer::normalize($this->text);
62
63
        // Drop empty quote
64 6
        $term = str_replace('""', '', $term);
65
66
        // Extract exact terms that are quoted
67 6
        preg_match_all('~"([^"]*)"~', $term, $m);
68
69 6
        return array_unique($m[1]);
70
    }
71
}
72