Completed
Push — develop ( da6d94...cc50d2 )
by Arkadiusz
03:40
created

TfIdfTransformer   A

Complexity

Total Complexity 8

Size/Duplication

Total Lines 48
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Importance

Changes 0
Metric Value
wmc 8
lcom 1
cbo 0
dl 0
loc 48
rs 10
c 0
b 0
f 0

2 Methods

Rating   Name   Duplication   Size   Complexity  
A transform() 0 17 4
A countTokensFrequency() 0 12 4
1
<?php
2
3
declare (strict_types = 1);
4
5
namespace Phpml\FeatureExtraction;
6
7
class TfIdfTransformer implements Transformer
8
{
9
    /**
10
     * @var array
11
     */
12
    private $idf;
13
14
    /**
15
     * @param array $samples
16
     * 
17
     * @return array
18
     */
19
    public function transform(array $samples): array
20
    {
21
        $this->countTokensFrequency($samples);
22
23
        $count = count($samples);
24
        foreach ($this->idf as &$value) {
25
            $value = log($count / $value, 10);
26
        }
27
28
        foreach ($samples as &$sample) {
29
            foreach ($sample as $index => &$feature) {
30
                $feature = $feature * $this->idf[$index];
31
            }
32
        }
33
34
        return $samples;
35
    }
36
37
    /**
38
     * @param array $samples
39
     *
40
     * @return array
41
     */
42
    private function countTokensFrequency(array $samples)
43
    {
44
        $this->idf = array_fill_keys(array_keys($samples[0]), 0);
45
46
        foreach ($samples as $sample) {
47
            foreach ($sample as $index => $count) {
48
                if ($count > 0) {
49
                    ++$this->idf[$index];
50
                }
51
            }
52
        }
53
    }
54
}
55