Completed
Push — develop ( da6d94...cc50d2 )
by Arkadiusz
03:40
created

TfIdfTransformer::transform()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.2
c 0
b 0
f 0
cc 4
eloc 9
nc 6
nop 1
1
<?php
2
3
declare (strict_types = 1);
4
5
namespace Phpml\FeatureExtraction;
6
7
class TfIdfTransformer implements Transformer
8
{
9
    /**
10
     * @var array
11
     */
12
    private $idf;
13
14
    /**
15
     * @param array $samples
16
     * 
17
     * @return array
18
     */
19
    public function transform(array $samples): array
20
    {
21
        $this->countTokensFrequency($samples);
22
23
        $count = count($samples);
24
        foreach ($this->idf as &$value) {
25
            $value = log($count / $value, 10);
26
        }
27
28
        foreach ($samples as &$sample) {
29
            foreach ($sample as $index => &$feature) {
30
                $feature = $feature * $this->idf[$index];
31
            }
32
        }
33
34
        return $samples;
35
    }
36
37
    /**
38
     * @param array $samples
39
     *
40
     * @return array
41
     */
42
    private function countTokensFrequency(array $samples)
43
    {
44
        $this->idf = array_fill_keys(array_keys($samples[0]), 0);
45
46
        foreach ($samples as $sample) {
47
            foreach ($sample as $index => $count) {
48
                if ($count > 0) {
49
                    ++$this->idf[$index];
50
                }
51
            }
52
        }
53
    }
54
}
55