Conditions | 5 |
Total Lines | 63 |
Code Lines | 43 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | """Language model based ensemble backend that combines results from multiple |
||
74 | def _merge_source_batches( |
||
75 | self, |
||
76 | texts: list[str], |
||
77 | batch_by_source: dict[str, SuggestionBatch], |
||
78 | sources: list[tuple[str, float]], |
||
79 | params: dict[str, Any], |
||
80 | ) -> SuggestionBatch: |
||
81 | model = params["model"] |
||
82 | # llm_scores_weight = float(params["llm_weight"]) |
||
83 | encoding = tiktoken.encoding_for_model(model.rsplit("-", 1)[0]) |
||
84 | |||
85 | batches = [batch_by_source[project_id] for project_id, _ in sources] |
||
86 | weights = [weight for _, weight in sources] |
||
87 | avg_suggestion_batch = SuggestionBatch.from_averaged(batches, weights).filter( |
||
88 | limit=int(params["limit"]) # TODO Increase limit |
||
89 | ) |
||
90 | |||
91 | labels_batch = [] |
||
92 | for suggestionresult in avg_suggestion_batch: |
||
93 | # print(suggestionresult) |
||
94 | # for suggestion in suggestionresult: |
||
95 | # # print(suggestion) |
||
96 | # print(self.project.subjects[suggestion.subject_id].labels["en"]) |
||
97 | labels_batch.append( |
||
98 | [ |
||
99 | self.project.subjects[s.subject_id].labels[ |
||
100 | "en" |
||
101 | ] # TODO: make language selectable |
||
102 | for s in suggestionresult |
||
103 | ] |
||
104 | ) |
||
105 | # print(labels_batch) |
||
106 | |||
107 | llm_batch_suggestions = [] |
||
108 | for text, labels in zip(texts, labels_batch): |
||
109 | print(text) |
||
110 | print(labels) |
||
111 | |||
112 | prompt = "Here are the keywords:\n" + "\n".join(labels) + "\n" * 3 |
||
113 | text = self._truncate_text(text, encoding) |
||
114 | prompt += "Here is the text:\n" + text + "\n" |
||
115 | |||
116 | response = self._call_llm(prompt, model) |
||
117 | print(response) |
||
118 | try: |
||
119 | llm_result = json.loads(response) |
||
120 | except (TypeError, json.decoder.JSONDecodeError) as err: |
||
121 | print(err) |
||
122 | llm_result = None |
||
123 | continue # TODO: handle this error |
||
124 | suggestions = [] |
||
125 | for label, score in llm_result.items(): |
||
126 | print(label, score) |
||
127 | subj_id = self.project.subjects.by_label( |
||
128 | label, "en" |
||
129 | ) # TODO: make language selectable |
||
130 | # print(subj_id) |
||
131 | suggestions.append(SubjectSuggestion(subject_id=subj_id, score=score)) |
||
132 | |||
133 | llm_batch_suggestions.append(suggestions) |
||
134 | |||
135 | return SuggestionBatch.from_sequence( |
||
136 | llm_batch_suggestions, self.project.subjects |
||
137 | ) |
||
172 |