TermFrequency::execute()   A
last analyzed

Complexity

Conditions 5
Paths 7

Size

Total Lines 27
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 20
nc 7
nop 0
dl 0
loc 27
rs 9.2888
c 0
b 0
f 0
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 1-3-2017
5
 * Time: 10:34
6
 */
7
8
namespace CloudControl\Cms\search\indexer;
9
10
use CloudControl\Cms\search\Indexer;
11
12
/**
13
 * Calculate, relatively how often a term is used in a document
14
 * Where relativly means compared to the total of terms, how often is term X
15
 * used. For example:
16
 * doc1 has the following terms:
17
 * - term1 (count 2)
18
 * - term2 (count 1)
19
 * The total count of terms = 3
20
 * The frequency of term1 in doc1 is:
21
 * count of term 1 / total count of terms
22
 * =
23
 * 2 / 3 = 0.66666666667
24
 */
25
class TermFrequency
26
{
27
    /**
28
     * @var \PDO
29
     */
30
    protected $dbHandle;
31
32
    /**
33
     * TermFrequency constructor.
34
     *
35
     * @param \PDO $dbHandle
36
     */
37
    public function __construct($dbHandle)
38
    {
39
        $this->dbHandle = $dbHandle;
40
    }
41
42
    public function execute()
43
    {
44
        $db = $this->dbHandle;
45
        $totalTermCountPerDocument = $this->getTotalTermCountPerDocument($db);
46
        foreach ($totalTermCountPerDocument as $documentField) {
47
            $termsForDocumentField = $this->getTermsForDocumentField($documentField->documentPath,
48
                $documentField->field);
49
            $sql = '
50
				INSERT INTO term_frequency (documentPath, field, term, frequency)
51
					 VALUES 
52
			';
53
            $quotedDocumentPath = $db->quote($documentField->documentPath);
54
            $quotedField = $db->quote($documentField->field);
55
            $values = array();
56
            $i = 0;
57
            foreach ($termsForDocumentField as $term) {
58
                $frequency = (int)$term->count / $documentField->totalTermCount;
59
                $values[] = $quotedDocumentPath . ',' . $quotedField . ', ' . $db->quote($term->term) . ', ' . $db->quote($frequency);
60
                $i += 1;
61
                if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
62
                    $this->executeStore($sql, $values, $db);
63
                    $i = 0;
64
                    $values = array();
65
                }
66
            }
67
            if (count($values) != 0) {
68
                $this->executeStore($sql, $values, $db);
69
            }
70
        }
71
    }
72
73
    private function getTermsForDocumentField($documentPath, $field)
74
    {
75
        $db = $this->dbHandle;
76
        $stmt = $db->prepare('
77
			SELECT `term`, `count`
78
			  FROM `term_count`
79
			 WHERE `documentPath` = :documentPath
80
			   AND `field` = :field
81
		');
82
        $stmt->bindValue(':documentPath', $documentPath);
83
        $stmt->bindValue(':field', $field);
84
        $stmt->execute();
85
        return $stmt->fetchAll(\PDO::FETCH_CLASS);
86
    }
87
88
    /**
89
     * @param $db
90
     *
91
     * @return mixed
92
     */
93
    private function getTotalTermCountPerDocument($db)
94
    {
95
        $stmt = $db->prepare('
96
			SELECT documentPath, field, SUM(count) AS totalTermCount
97
			  FROM term_count
98
		  GROUP BY documentPath, field
99
		');
100
        $stmt->execute();
101
        $totalTermCountPerDocument = $stmt->fetchAll(\PDO::FETCH_CLASS);
102
103
        return $totalTermCountPerDocument;
104
    }
105
106
    private function executeStore($sql, $values, $db)
107
    {
108
        $sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');';
109
        if (!$db->query($sql)) {
110
            $errorInfo = $db->errorInfo();
111
            $errorMsg = $errorInfo[2];
112
            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
113
        }
114
    }
115
116
}