Full::getExpressionTypeMap()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 1
cts 1
cp 1
crap 1
rs 10
1
<?php declare(strict_types = 1);
2
3
namespace Apicart\FQL\Tokenizer;
4
5
use Apicart\FQL\Token\Token\Flags;
6
use Apicart\FQL\Token\Token\Phrase;
7
use Apicart\FQL\Token\Token\Range;
8
use Apicart\FQL\Token\Token\Tag;
9
use Apicart\FQL\Token\Token\User;
10
use Apicart\FQL\Token\Token\Word;
11
use Apicart\FQL\Value\Token;
12
use RuntimeException;
13
14
final class Full extends AbstractTokenExtractor
15
{
16
17
    /**
18
     * Map of regex expressions to Token types.
19
     *
20
     * @var array
21
     */
22
    private static $expressionTypeMap = [
23
        '/(?<lexeme>[\s]+)/Au' => Tokenizer::TOKEN_WHITESPACE,
24
        '/(?<lexeme>\+)/Au' => Tokenizer::TOKEN_MANDATORY,
25
        '/(?<lexeme>-)/Au' => Tokenizer::TOKEN_PROHIBITED,
26
        '/(?<lexeme>!)/Au' => Tokenizer::TOKEN_LOGICAL_NOT_2,
27
        '/(?<lexeme>\))/Au' => Tokenizer::TOKEN_GROUP_END,
28
        '/(?<lexeme>NOT)(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_NOT,
29
        '/(?<lexeme>(?:AND|&&))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_AND,
30
        '/(?<lexeme>(?:OR|\|\|))(?:[\s"()+\-!]|$)/Au' => Tokenizer::TOKEN_LOGICAL_OR,
31
        '/(?<lexeme>(?:(?<domain>(?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?(?<delimiter>\())/Au'
32
        => Tokenizer::TOKEN_GROUP_BEGIN,
33
        '/(?<lexeme>(?:(?<marker>(?<!\\\\)\#)(?<tag>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au'
34
        => Tokenizer::TOKEN_TERM,
35
        '/(?<lexeme>(?:(?<marker>(?<!\\\\)@)(?<user>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*)))(?:[\s"()+!]|$)/Au'
36
        => Tokenizer::TOKEN_TERM,
37
        '/(?<lexeme>(?:(?<domain>(?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?(?<quote>(?<!\\\\)["])' .
38
        '(?<phrase>.*?)(?:(?<!\\\\)(?P=quote))(?:(?<marker>(?<!\\\\)\#)(?<flags>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?)/Aus'
39
        => Tokenizer::TOKEN_TERM,
40
        '/(?<lexeme>(?:(?<domain>(?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?(?<rangeStartSymbol>[\[\{])' .
41
        '(?<rangeFrom>([a-zA-Z0-9\,\._-]+|\*)|(?<quoteFrom>(?<!\\\\)["]).*?(?:(?<!\\\\)(?P=quoteFrom)))[\s]+TO[\s]+' .
42
        '(?<rangeTo>([a-zA-Z0-9\,\._-]+|\*)|(?<quoteTo>(?<!\\\\)["]).*?(?:(?<!\\\\)(?P=quoteTo)))' .
43
        '(?<rangeEndSymbol>[\]\}])(?:(?<marker>(?<!\\\\)\#)(?<flags>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?)/Aus'
44
        => Tokenizer::TOKEN_TERM,
45
        '/(?<lexeme>(?:(?<domain>(?:[a-zA-Z_\-.\[\]\*%][a-zA-Z0-9_\-.\[\]\*%]*|\'[^\']+\')):)?' .
46
        '(?<word>(?:\\\\\\\\|\\\\ |\\\\\(|\\\\\)|\\\\"|[^"()\s])+?))' .
47
        '(?:(?<marker>(?<!\\\\)\#)(?<flags>[a-zA-Z0-9_][a-zA-Z0-9_\-.]*))?(?:(?<!\\\\)["]|\(|\)|$|\s)/Au'
48 320
        => Tokenizer::TOKEN_TERM,
49
    ];
50 320
51
52
    protected function getExpressionTypeMap(): array
53
    {
54 277
        return self::$expressionTypeMap;
55
    }
56 277
57
58 277
    protected function createTermToken(int $position, array $data): Token
59 8
    {
60 8
        $lexeme = $data['lexeme'];
61
        switch (true) {
62 8
            case isset($data['rangeStartSymbol']) && isset($data['rangeEndSymbol']):
63 8
                $startValue = str_replace(',', '.', str_replace('"', '', $data['rangeFrom']));
64 8
                $endValue = str_replace(',', '.', str_replace('"', '', $data['rangeTo']));
65 8
66 8
                return new Range(
67 8
                    $lexeme,
68 8
                    $position,
69 8
                    $data['domain'],
70
                    is_array($startValue) ? reset($startValue) : $startValue,
71
                    is_array($endValue) ? reset($endValue) : $endValue,
72 270
                    $this->getRangeTypeBySymbol($data['rangeStartSymbol']),
73 241
                    $this->getRangeTypeBySymbol($data['rangeEndSymbol']),
74 241
                    isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null
75 241
                );
76 241
77
            case isset($data['word']):
78 241
                return new Word(
79
                    $lexeme,
80
                    $position,
81 34
                    $data['domain'],
82 12
                    // un-backslash special characters
83 12
                    preg_replace('/(?:\\\\(\\\\|(["+\-!():#@ ])))/', '$1', $data['word']),
84 12
                    isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null
85 12
                );
86 12
87 12
            case isset($data['phrase']):
88
                $quote = $data['quote'];
89 12
                return new Phrase(
90
                    $lexeme,
91
                    $position,
92 22
                    $data['domain'],
93 10
                    $quote,
94
                    // un-backslash quote
95 12
                    preg_replace('/(?:\\\\([' . $quote . ']))/', '$1', $data['phrase']),
96 11
                    isset($data['marker'], $data['flags']) ? new Flags($data['marker'], $data['flags']) : null
97
                );
98 1
99
            case isset($data['tag']):
100
                return new Tag($lexeme, $position, $data['marker'], $data['tag']);
101
102 8
            case isset($data['user']):
103
                return new User($lexeme, $position, $data['marker'], $data['user']);
104 8
        }
105 4
        throw new RuntimeException('Could not extract term token from the given data');
106
    }
107 7
108
109
    protected function getRangeTypeBySymbol(string $symbol): string
110
    {
111
        if (in_array($symbol, ['{', '}'], true)) {
112
            return Range::TYPE_EXCLUSIVE;
113
        }
114
        return Range::TYPE_INCLUSIVE;
115
    }
116
117
}
118