Completed
Push — develop ( 7f4a0b...601ff8 )
by Arkadiusz
03:21
created

TfIdfTransformer::fit()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 9
rs 9.6666
cc 2
eloc 5
nc 2
nop 1
1
<?php
2
3
declare (strict_types = 1);
4
5
namespace Phpml\FeatureExtraction;
6
7
use Phpml\Transformer;
8
9
class TfIdfTransformer implements Transformer
10
{
11
    /**
12
     * @var array
13
     */
14
    private $idf;
15
16
    /**
17
     * @param array $samples
18
     */
19
    public function __construct(array $samples = null)
20
    {
21
        if ($samples) {
22
            $this->fit($samples);
23
        }
24
    }
25
26
    /**
27
     * @param array $samples
28
     */
29
    public function fit(array $samples)
30
    {
31
        $this->countTokensFrequency($samples);
32
33
        $count = count($samples);
34
        foreach ($this->idf as &$value) {
35
            $value = log($count / $value, 10);
36
        }
37
    }
38
39
    /**
40
     * @param array $samples
41
     */
42
    public function transform(array &$samples)
43
    {
44
        foreach ($samples as &$sample) {
45
            foreach ($sample as $index => &$feature) {
46
                $feature = $feature * $this->idf[$index];
47
            }
48
        }
49
    }
50
51
    /**
52
     * @param array $samples
53
     *
54
     * @return array
55
     */
56
    private function countTokensFrequency(array $samples)
57
    {
58
        $this->idf = array_fill_keys(array_keys($samples[0]), 0);
59
60
        foreach ($samples as $sample) {
61
            foreach ($sample as $index => $count) {
62
                if ($count > 0) {
63
                    ++$this->idf[$index];
64
                }
65
            }
66
        }
67
    }
68
}
69