| Conditions | 5 |
| Total Lines | 63 |
| Code Lines | 43 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
| 1 | """Language model based ensemble backend that combines results from multiple |
||
| 74 | def _merge_source_batches( |
||
| 75 | self, |
||
| 76 | texts: list[str], |
||
| 77 | batch_by_source: dict[str, SuggestionBatch], |
||
| 78 | sources: list[tuple[str, float]], |
||
| 79 | params: dict[str, Any], |
||
| 80 | ) -> SuggestionBatch: |
||
| 81 | model = params["model"] |
||
| 82 | # llm_scores_weight = float(params["llm_weight"]) |
||
| 83 | encoding = tiktoken.encoding_for_model(model.rsplit("-", 1)[0]) |
||
| 84 | |||
| 85 | batches = [batch_by_source[project_id] for project_id, _ in sources] |
||
| 86 | weights = [weight for _, weight in sources] |
||
| 87 | avg_suggestion_batch = SuggestionBatch.from_averaged(batches, weights).filter( |
||
| 88 | limit=int(params["limit"]) # TODO Increase limit |
||
| 89 | ) |
||
| 90 | |||
| 91 | labels_batch = [] |
||
| 92 | for suggestionresult in avg_suggestion_batch: |
||
| 93 | # print(suggestionresult) |
||
| 94 | # for suggestion in suggestionresult: |
||
| 95 | # # print(suggestion) |
||
| 96 | # print(self.project.subjects[suggestion.subject_id].labels["en"]) |
||
| 97 | labels_batch.append( |
||
| 98 | [ |
||
| 99 | self.project.subjects[s.subject_id].labels[ |
||
| 100 | "en" |
||
| 101 | ] # TODO: make language selectable |
||
| 102 | for s in suggestionresult |
||
| 103 | ] |
||
| 104 | ) |
||
| 105 | # print(labels_batch) |
||
| 106 | |||
| 107 | llm_batch_suggestions = [] |
||
| 108 | for text, labels in zip(texts, labels_batch): |
||
| 109 | print(text) |
||
| 110 | print(labels) |
||
| 111 | |||
| 112 | prompt = "Here are the keywords:\n" + "\n".join(labels) + "\n" * 3 |
||
| 113 | text = self._truncate_text(text, encoding) |
||
| 114 | prompt += "Here is the text:\n" + text + "\n" |
||
| 115 | |||
| 116 | response = self._call_llm(prompt, model) |
||
| 117 | print(response) |
||
| 118 | try: |
||
| 119 | llm_result = json.loads(response) |
||
| 120 | except (TypeError, json.decoder.JSONDecodeError) as err: |
||
| 121 | print(err) |
||
| 122 | llm_result = None |
||
| 123 | continue # TODO: handle this error |
||
| 124 | suggestions = [] |
||
| 125 | for label, score in llm_result.items(): |
||
| 126 | print(label, score) |
||
| 127 | subj_id = self.project.subjects.by_label( |
||
| 128 | label, "en" |
||
| 129 | ) # TODO: make language selectable |
||
| 130 | # print(subj_id) |
||
| 131 | suggestions.append(SubjectSuggestion(subject_id=subj_id, score=score)) |
||
| 132 | |||
| 133 | llm_batch_suggestions.append(suggestions) |
||
| 134 | |||
| 135 | return SuggestionBatch.from_sequence( |
||
| 136 | llm_batch_suggestions, self.project.subjects |
||
| 137 | ) |
||
| 172 |