TermFrequency   A
last analyzed

Complexity

Total Complexity 10

Size/Duplication

Total Lines 88
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 38
dl 0
loc 88
rs 10
c 0
b 0
f 0
wmc 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A getTotalTermCountPerDocument() 0 11 1
A getTermsForDocumentField() 0 13 1
A execute() 0 27 5
A executeStore() 0 7 2
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 1-3-2017
5
 * Time: 10:34
6
 */
7
8
namespace CloudControl\Cms\search\indexer;
9
10
use CloudControl\Cms\search\Indexer;
11
12
/**
13
 * Calculate, relatively how often a term is used in a document
14
 * Where relativly means compared to the total of terms, how often is term X
15
 * used. For example:
16
 * doc1 has the following terms:
17
 * - term1 (count 2)
18
 * - term2 (count 1)
19
 * The total count of terms = 3
20
 * The frequency of term1 in doc1 is:
21
 * count of term 1 / total count of terms
22
 * =
23
 * 2 / 3 = 0.66666666667
24
 */
25
class TermFrequency
26
{
27
    /**
28
     * @var \PDO
29
     */
30
    protected $dbHandle;
31
32
    /**
33
     * TermFrequency constructor.
34
     *
35
     * @param \PDO $dbHandle
36
     */
37
    public function __construct($dbHandle)
38
    {
39
        $this->dbHandle = $dbHandle;
40
    }
41
42
    public function execute()
43
    {
44
        $db = $this->dbHandle;
45
        $totalTermCountPerDocument = $this->getTotalTermCountPerDocument($db);
46
        foreach ($totalTermCountPerDocument as $documentField) {
47
            $termsForDocumentField = $this->getTermsForDocumentField($documentField->documentPath,
48
                $documentField->field);
49
            $sql = '
50
				INSERT INTO term_frequency (documentPath, field, term, frequency)
51
					 VALUES 
52
			';
53
            $quotedDocumentPath = $db->quote($documentField->documentPath);
54
            $quotedField = $db->quote($documentField->field);
55
            $values = array();
56
            $i = 0;
57
            foreach ($termsForDocumentField as $term) {
58
                $frequency = (int)$term->count / $documentField->totalTermCount;
59
                $values[] = $quotedDocumentPath . ',' . $quotedField . ', ' . $db->quote($term->term) . ', ' . $db->quote($frequency);
60
                $i += 1;
61
                if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
62
                    $this->executeStore($sql, $values, $db);
63
                    $i = 0;
64
                    $values = array();
65
                }
66
            }
67
            if (count($values) != 0) {
68
                $this->executeStore($sql, $values, $db);
69
            }
70
        }
71
    }
72
73
    private function getTermsForDocumentField($documentPath, $field)
74
    {
75
        $db = $this->dbHandle;
76
        $stmt = $db->prepare('
77
			SELECT `term`, `count`
78
			  FROM `term_count`
79
			 WHERE `documentPath` = :documentPath
80
			   AND `field` = :field
81
		');
82
        $stmt->bindValue(':documentPath', $documentPath);
83
        $stmt->bindValue(':field', $field);
84
        $stmt->execute();
85
        return $stmt->fetchAll(\PDO::FETCH_CLASS);
86
    }
87
88
    /**
89
     * @param $db
90
     *
91
     * @return mixed
92
     */
93
    private function getTotalTermCountPerDocument($db)
94
    {
95
        $stmt = $db->prepare('
96
			SELECT documentPath, field, SUM(count) AS totalTermCount
97
			  FROM term_count
98
		  GROUP BY documentPath, field
99
		');
100
        $stmt->execute();
101
        $totalTermCountPerDocument = $stmt->fetchAll(\PDO::FETCH_CLASS);
102
103
        return $totalTermCountPerDocument;
104
    }
105
106
    private function executeStore($sql, $values, $db)
107
    {
108
        $sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');';
109
        if (!$db->query($sql)) {
110
            $errorInfo = $db->errorInfo();
111
            $errorMsg = $errorInfo[2];
112
            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
113
        }
114
    }
115
116
}