TfIdfTransformer   A
last analyzed

Complexity

Total Complexity 11

Size/Duplication

Total Lines 41
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 11
eloc 16
dl 0
loc 41
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A fit() 0 7 2
A countTokensFrequency() 0 8 4
A __construct() 0 4 2
A transform() 0 5 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\FeatureExtraction;
6
7
use Phpml\Transformer;
8
9
class TfIdfTransformer implements Transformer
10
{
11
    /**
12
     * @var array
13
     */
14
    private $idf = [];
15
16
    public function __construct(array $samples = [])
17
    {
18
        if (count($samples) > 0) {
19
            $this->fit($samples);
20
        }
21
    }
22
23
    public function fit(array $samples, ?array $targets = null): void
24
    {
25
        $this->countTokensFrequency($samples);
26
27
        $count = count($samples);
28
        foreach ($this->idf as &$value) {
29
            $value = log((float) ($count / $value), 10.0);
30
        }
31
    }
32
33
    public function transform(array &$samples, ?array &$targets = null): void
34
    {
35
        foreach ($samples as &$sample) {
36
            foreach ($sample as $index => &$feature) {
37
                $feature *= $this->idf[$index];
38
            }
39
        }
40
    }
41
42
    private function countTokensFrequency(array $samples): void
43
    {
44
        $this->idf = array_fill_keys(array_keys($samples[0]), 0);
45
46
        foreach ($samples as $sample) {
47
            foreach ($sample as $index => $count) {
48
                if ($count > 0) {
49
                    ++$this->idf[$index];
50
                }
51
            }
52
        }
53
    }
54
}
55