DocumentTokenizer   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 150
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 53
dl 0
loc 150
rs 10
c 0
b 0
f 0
wmc 25

12 Methods

Rating   Name   Duplication   Size   Complexity  
A addTokenVectorToVector() 0 4 2
A getTokens() 0 3 1
A tokenizeFields() 0 7 2
A tokenizeField() 0 8 3
A tokenizeDynamicBricks() 0 5 2
A tokenizeTitle() 0 5 1
A __construct() 0 5 1
A addTokenToVector() 0 7 3
A tokenize() 0 8 1
A tokenizeBricks() 0 10 4
A tokenizeBrick() 0 7 2
A getFieldType() 0 9 3
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:38
6
 */
7
8
namespace CloudControl\Cms\search;
9
10
11
use CloudControl\Cms\storage\entities\Document;
12
use CloudControl\Cms\storage\Storage;
13
14
class DocumentTokenizer
15
{
16
    /**
17
     * @var Document
18
     */
19
    protected $document;
20
21
    /**
22
     * @var array
23
     */
24
    protected $tokenVector = array();
25
    protected $storage;
26
27
    /**
28
     * Tokenizer constructor.
29
     *
30
     * @param \CloudControl\Cms\storage\entities\Document $document
31
     * @param Storage $storage
32
     */
33
    public function __construct(Document $document, Storage $storage)
34
    {
35
        $this->document = $document;
36
        $this->storage = $storage;
37
        $this->tokenize();
38
    }
39
40
    /**
41
     * Execute tokenization of all document fields
42
     */
43
    private function tokenize()
44
    {
45
        $this->tokenizeTitle();
46
        $this->tokenizeFields();
47
        $this->tokenizeBricks();
48
        $this->tokenizeDynamicBricks();
49
        $this->tokenVector = array_filter($this->tokenVector);
50
        arsort($this->tokenVector);
51
    }
52
53
    private function tokenizeTitle()
54
    {
55
        $filteredString = new CharacterFilter($this->document->title);
56
        $tokenizer = new Tokenizer($filteredString);
57
        $this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
    }
59
60
    private function tokenizeFields()
61
    {
62
        $fields = $this->document->fields;
63
        $documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
        foreach ($fields as $fieldName => $field) {
65
            $fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
            $this->tokenizeField($field, $fieldName, $fieldType);
67
        }
68
    }
69
70
    private function tokenizeField($field, $fieldName, $fieldType)
71
    {
72
        foreach ($field as $value) {
73
            // Only index fields that contain text
74
            if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
                $filteredString = new CharacterFilter($value);
76
                $tokenizer = new Tokenizer($filteredString);
77
                $this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
            }
79
        }
80
    }
81
82
    private function tokenizeBricks()
83
    {
84
        $bricks = $this->document->bricks;
85
        foreach ($bricks as $brickSlug => $bricks) {
86
            if (is_array($bricks)) {
87
                foreach ($bricks as $brick) {
88
                    $this->tokenizeBrick($brick, $brickSlug);
89
                }
90
            } else {
91
                $this->tokenizeBrick($bricks, $brickSlug);
92
            }
93
        }
94
    }
95
96
    private function tokenizeBrick($brick, $brickSlug)
97
    {
98
        $fields = $brick->fields;
99
        $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
100
        foreach ($fields as $fieldName => $field) {
101
            $fieldType = $this->getFieldType($fieldName, $brickDefinition);
102
            $this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
103
        }
104
    }
105
106
    private function tokenizeDynamicBricks()
107
    {
108
        $dynamicBricks = $this->document->dynamicBricks;
109
        foreach ($dynamicBricks as $key => $brick) {
110
            $this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
111
        }
112
    }
113
114
    public function getTokens()
115
    {
116
        return $this->tokenVector;
117
    }
118
119
    /**
120
     * Add a token to the existing tokenvector
121
     * @param            $token
122
     * @param string $field
123
     * @param int $count
124
     */
125
    private function addTokenToVector($token, $field, $count = 1)
126
    {
127
        if (!empty($token)) {
128
            if (isset($this->tokenVector[$field][$token])) {
129
                $this->tokenVector[$field][$token] += $count;
130
            } else {
131
                $this->tokenVector[$field][$token] = $count;
132
            }
133
        }
134
    }
135
136
    /**
137
     * Add a complete token vector to the existing one.
138
     * @param $tokenVector
139
     * @param $field
140
     */
141
    private function addTokenVectorToVector($tokenVector, $field)
142
    {
143
        foreach ($tokenVector as $token => $count) {
144
            $this->addTokenToVector($token, $field, $count);
145
        }
146
    }
147
148
    /**
149
     * Get the type for a field
150
     * @param $fieldName
151
     * @param $documentDefinition
152
     * @return mixed
153
     * @throws \Exception
154
     */
155
    private function getFieldType($fieldName, $documentDefinition)
156
    {
157
        foreach ($documentDefinition->fields as $fieldTypeDefinition) {
158
            if ($fieldTypeDefinition->slug === $fieldName) {
159
                return $fieldTypeDefinition->type;
160
            }
161
        }
162
163
        throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
164
    }
165
}