Passed
Push — master ( 03c468...179943 )
by Darko
11:56
created

CategorizationPipeline   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 132
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 12
eloc 56
c 1
b 0
f 0
dl 0
loc 132
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 7 1
A createDefault() 0 12 1
B categorize() 0 63 8
A getCategorizers() 0 3 1
A addCategorizer() 0 6 1
1
<?php
2
3
namespace App\Services\Categorization;
4
5
use App\Models\Category;
6
use App\Models\Settings;
7
use App\Models\UsenetGroup;
8
use App\Services\Categorization\Contracts\CategorizerInterface;
9
use Illuminate\Support\Collection;
10
11
/**
12
 * Pipeline-based categorization service.
13
 *
14
 * This service orchestrates multiple categorizers to determine the best
15
 * category for a release. Each categorizer is responsible for a specific
16
 * category domain and returns a result with a confidence score.
17
 */
18
class CategorizationPipeline
19
{
20
    /**
21
     * @var Collection<CategorizerInterface>
22
     */
23
    protected Collection $categorizers;
24
25
    protected bool $categorizeForeign;
26
    protected bool $catWebDL;
27
28
    /**
29
     * @param iterable<CategorizerInterface> $categorizers
30
     */
31
    public function __construct(iterable $categorizers = [])
32
    {
33
        $this->categorizers = collect($categorizers)
0 ignored issues
show
Bug introduced by
It seems like $categorizers can also be of type array; however, parameter $value of collect() does only seem to accept Illuminate\Contracts\Support\Arrayable, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

33
        $this->categorizers = collect(/** @scrutinizer ignore-type */ $categorizers)
Loading history...
34
            ->sortBy(fn (CategorizerInterface $c) => $c->getPriority());
35
36
        $this->categorizeForeign = (bool) Settings::settingValue('categorizeforeign');
37
        $this->catWebDL = (bool) Settings::settingValue('catwebdl');
38
    }
39
40
    /**
41
     * Register a categorizer in the pipeline.
42
     */
43
    public function addCategorizer(CategorizerInterface $categorizer): self
44
    {
45
        $this->categorizers->push($categorizer);
46
        $this->categorizers = $this->categorizers->sortBy(fn (CategorizerInterface $c) => $c->getPriority());
47
48
        return $this;
49
    }
50
51
    /**
52
     * Determine the category for a release.
53
     *
54
     * @param int|string $groupId The usenet group ID
55
     * @param string $releaseName The name of the release
56
     * @param string|null $poster The poster name
57
     * @param bool $debug Whether to include debug information
58
     * @return array The categorization result
59
     */
60
    public function categorize(
61
        int|string $groupId,
62
        string $releaseName,
63
        ?string $poster = '',
64
        bool $debug = false
65
    ): array {
66
        $groupName = UsenetGroup::whereId($groupId)->value('name') ?? '';
67
68
        $context = new ReleaseContext(
69
            releaseName: $releaseName,
70
            groupId: $groupId,
71
            groupName: $groupName,
72
            poster: $poster ?? '',
73
            categorizeForeign: $this->categorizeForeign,
74
            catWebDL: $this->catWebDL,
75
        );
76
77
        $bestResult = CategorizationResult::noMatch();
78
        $allResults = [];
79
80
        foreach ($this->categorizers as $categorizer) {
81
            // Skip if categorizer determines it shouldn't process this release
82
            if ($categorizer->shouldSkip($context)) {
83
                continue;
84
            }
85
86
            $result = $categorizer->categorize($context);
87
88
            if ($debug) {
89
                $allResults[$categorizer->getName()] = [
90
                    'category_id' => $result->categoryId,
91
                    'confidence' => $result->confidence,
92
                    'matched_by' => $result->matchedBy,
93
                ];
94
            }
95
96
            // If this result is better than our current best, use it
97
            if ($result->isSuccessful() && $result->shouldOverride($bestResult)) {
98
                $bestResult = $result;
99
100
                // If we have a very high confidence match, we can stop early
101
                if ($result->confidence >= 0.95) {
102
                    break;
103
                }
104
            }
105
        }
106
107
        // Build the return array
108
        $returnValue = ['categories_id' => $bestResult->categoryId];
109
110
        if ($debug) {
111
            $returnValue['debug'] = [
112
                'final_category' => $bestResult->categoryId,
113
                'final_confidence' => $bestResult->confidence,
114
                'matched_by' => $bestResult->matchedBy,
115
                'release_name' => $releaseName,
116
                'group_name' => $groupName,
117
                'all_results' => $allResults,
118
                'categorizer_details' => $bestResult->debug,
119
            ];
120
        }
121
122
        return $returnValue;
123
    }
124
125
    /**
126
     * Get all registered categorizers.
127
     *
128
     * @return Collection<CategorizerInterface>
129
     */
130
    public function getCategorizers(): Collection
131
    {
132
        return $this->categorizers;
133
    }
134
135
    /**
136
     * Create a default pipeline with all standard categorizers.
137
     */
138
    public static function createDefault(): self
139
    {
140
        return new self([
141
            new Categorizers\GroupNameCategorizer(),
142
            new Categorizers\XxxCategorizer(),
143
            new Categorizers\TvCategorizer(),
144
            new Categorizers\MovieCategorizer(),
145
            new Categorizers\BookCategorizer(),
146
            new Categorizers\MusicCategorizer(),
147
            new Categorizers\PcCategorizer(),
148
            new Categorizers\ConsoleCategorizer(),
149
            new Categorizers\MiscCategorizer(),
150
        ]);
151
    }
152
}
153
154