|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Squareetlabs\LaravelToon\Services; |
|
6
|
|
|
|
|
7
|
|
|
use Illuminate\Support\Facades\Cache; |
|
8
|
|
|
|
|
9
|
|
|
class TokenAnalyzer |
|
10
|
|
|
{ |
|
11
|
|
|
private const CACHE_PREFIX = 'laravel-toon:tokens:'; |
|
12
|
|
|
|
|
13
|
|
|
public function __construct( |
|
14
|
|
|
private readonly ToonService $toon = new ToonService(), |
|
15
|
|
|
) {} |
|
16
|
|
|
|
|
17
|
|
|
public function estimate(string $content): int |
|
18
|
|
|
{ |
|
19
|
|
|
$cacheKey = self::CACHE_PREFIX.md5($content); |
|
20
|
|
|
|
|
21
|
|
|
if (config('laravel-toon.token_analysis.cache_results', true)) { |
|
22
|
|
|
$cached = Cache::store(config('laravel-toon.cache.store', 'file')) |
|
23
|
|
|
->get($cacheKey); |
|
24
|
|
|
if (null !== $cached) { |
|
25
|
|
|
return $cached; |
|
26
|
|
|
} |
|
27
|
|
|
} |
|
28
|
|
|
|
|
29
|
|
|
$tokens = $this->calculateTokens($content); |
|
30
|
|
|
|
|
31
|
|
|
if (config('laravel-toon.token_analysis.cache_results', true)) { |
|
32
|
|
|
Cache::store(config('laravel-toon.cache.store', 'file')) |
|
33
|
|
|
->put($cacheKey, $tokens, config('laravel-toon.token_analysis.cache_ttl', 3600)); |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
return $tokens; |
|
37
|
|
|
} |
|
38
|
|
|
|
|
39
|
|
|
public function estimateJson(mixed $data): int |
|
40
|
|
|
{ |
|
41
|
|
|
$json = json_encode($data, JSON_THROW_ON_ERROR); |
|
42
|
|
|
|
|
43
|
|
|
return $this->estimate($json); |
|
44
|
|
|
} |
|
45
|
|
|
|
|
46
|
|
|
public function estimateToon(mixed $data): int |
|
47
|
|
|
{ |
|
48
|
|
|
$toon = $this->toon->encode($data); |
|
49
|
|
|
|
|
50
|
|
|
return $this->estimate($toon); |
|
51
|
|
|
} |
|
52
|
|
|
|
|
53
|
|
|
public function compareJsonVsToon(mixed $data): array |
|
54
|
|
|
{ |
|
55
|
|
|
$jsonTokens = $this->estimateJson($data); |
|
56
|
|
|
$toonTokens = $this->estimateToon($data); |
|
57
|
|
|
$tokensSaved = $jsonTokens - $toonTokens; |
|
58
|
|
|
$percentSaved = $jsonTokens > 0 ? (($tokensSaved / $jsonTokens) * 100) : 0; |
|
59
|
|
|
|
|
60
|
|
|
return [ |
|
61
|
|
|
'json_tokens' => $jsonTokens, |
|
62
|
|
|
'toon_tokens' => $toonTokens, |
|
63
|
|
|
'tokens_saved' => $tokensSaved, |
|
64
|
|
|
'percent_saved' => round($percentSaved, 2), |
|
65
|
|
|
'efficiency_ratio' => $jsonTokens > 0 ? round($toonTokens / $jsonTokens, 3) : 0, |
|
66
|
|
|
]; |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
public function analyze(string $content): array |
|
70
|
|
|
{ |
|
71
|
|
|
$tokens = $this->estimate($content); |
|
72
|
|
|
$chars = strlen($content); |
|
73
|
|
|
$words = str_word_count($content); |
|
74
|
|
|
|
|
75
|
|
|
return [ |
|
76
|
|
|
'content' => $content, |
|
77
|
|
|
'length_chars' => $chars, |
|
78
|
|
|
'length_words' => $words, |
|
79
|
|
|
'tokens_estimated' => $tokens, |
|
80
|
|
|
'chars_per_token' => $chars > 0 ? round($chars / $tokens, 2) : 0, |
|
81
|
|
|
'analysis_method' => config('laravel-toon.token_analysis.estimate_method', 'character_ratio'), |
|
82
|
|
|
]; |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
public function analyzeJson(mixed $data): array |
|
86
|
|
|
{ |
|
87
|
|
|
$json = json_encode($data, JSON_THROW_ON_ERROR); |
|
88
|
|
|
|
|
89
|
|
|
return $this->analyze($json); |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
public function analyzeToon(mixed $data): array |
|
93
|
|
|
{ |
|
94
|
|
|
$toon = $this->toon->encode($data); |
|
95
|
|
|
|
|
96
|
|
|
return $this->analyze($toon); |
|
97
|
|
|
} |
|
98
|
|
|
|
|
99
|
|
|
public function budgetTokens(int $maxTokens, mixed $data): array |
|
100
|
|
|
{ |
|
101
|
|
|
$toonTokens = $this->estimateToon($data); |
|
102
|
|
|
$available = $maxTokens - $toonTokens; |
|
103
|
|
|
$percentUsed = $maxTokens > 0 ? (($toonTokens / $maxTokens) * 100) : 0; |
|
104
|
|
|
$percentAvailable = 100 - $percentUsed; |
|
105
|
|
|
|
|
106
|
|
|
return [ |
|
107
|
|
|
'max_tokens' => $maxTokens, |
|
108
|
|
|
'tokens_used' => $toonTokens, |
|
109
|
|
|
'tokens_available' => max(0, $available), |
|
110
|
|
|
'percent_used' => round($percentUsed, 2), |
|
111
|
|
|
'percent_available' => round($percentAvailable, 2), |
|
112
|
|
|
'within_budget' => $toonTokens <= $maxTokens, |
|
113
|
|
|
]; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
private function calculateTokens(string $content): int |
|
117
|
|
|
{ |
|
118
|
|
|
$method = config('laravel-toon.token_analysis.estimate_method', 'character_ratio'); |
|
119
|
|
|
|
|
120
|
|
|
return match ($method) { |
|
121
|
|
|
'word_count' => $this->estimateByWordCount($content), |
|
122
|
|
|
'character_ratio' => $this->estimateByCharacterRatio($content), |
|
123
|
|
|
default => $this->estimateByCharacterRatio($content), |
|
124
|
|
|
}; |
|
125
|
|
|
} |
|
126
|
|
|
|
|
127
|
|
|
private function estimateByCharacterRatio(string $content): int |
|
128
|
|
|
{ |
|
129
|
|
|
$chars = strlen($content); |
|
130
|
|
|
$charsPerToken = config('laravel-toon.token_analysis.chars_per_token', 4); |
|
131
|
|
|
|
|
132
|
|
|
return (int)ceil($chars / $charsPerToken); |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
|
|
private function estimateByWordCount(string $content): int |
|
136
|
|
|
{ |
|
137
|
|
|
$words = str_word_count($content); |
|
138
|
|
|
|
|
139
|
|
|
// Average: 1.3 tokens per word |
|
140
|
|
|
return (int)ceil($words * 1.3); |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
public function clearCache(): void |
|
144
|
|
|
{ |
|
145
|
|
|
Cache::store(config('laravel-toon.cache.store', 'file')) |
|
146
|
|
|
->forget(self::CACHE_PREFIX.'*'); |
|
147
|
|
|
} |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
|