Conditions | 5 |
Total Lines | 63 |
Code Lines | 43 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
1 | """Language model based ensemble backend that combines results from multiple |
||
74 | |||
75 | # Add LLM suggestions to the source batches |
||
76 | batch_by_source[self.project.project_id] = self._llm_suggest_batch( |
||
77 | texts, merged_source_batch, params |
||
78 | ) |
||
79 | new_sources = sources + [(self.project.project_id, float(params["llm_weight"]))] |
||
80 | return self._merge_source_batches(batch_by_source, new_sources, params) |
||
81 | |||
82 | def _llm_suggest_batch( |
||
83 | self, |
||
84 | texts: list[str], |
||
85 | suggestion_batch: SuggestionBatch, |
||
86 | params: dict[str, Any], |
||
87 | ) -> SuggestionBatch: |
||
88 | model = params["model"] |
||
89 | encoding = tiktoken.encoding_for_model(model.rsplit("-", 1)[0]) |
||
90 | labels_batch = self._get_labels_batch(suggestion_batch) |
||
91 | |||
92 | llm_batch_suggestions = [] |
||
93 | for text, labels in zip(texts, labels_batch): |
||
94 | prompt = "Here are the keywords:\n" + "\n".join(labels) + "\n" * 3 |
||
95 | text = self._truncate_text(text, encoding) |
||
96 | prompt += "Here is the text:\n" + text + "\n" |
||
97 | |||
98 | response = self._call_llm(prompt, model) |
||
99 | try: |
||
100 | llm_result = json.loads(response) |
||
101 | except (TypeError, json.decoder.JSONDecodeError) as err: |
||
102 | print(err) |
||
103 | llm_result = None |
||
104 | continue # TODO: handle this error |
||
105 | llm_suggestions = [ |
||
106 | SubjectSuggestion( |
||
107 | subject_id=self.project.subjects.by_label(llm_label, "en"), |
||
108 | score=score, |
||
109 | ) |
||
110 | for llm_label, score in llm_result.items() |
||
111 | ] |
||
112 | llm_batch_suggestions.append(llm_suggestions) |
||
113 | return SuggestionBatch.from_sequence( |
||
114 | llm_batch_suggestions, |
||
115 | self.project.subjects, |
||
116 | ) |
||
117 | |||
118 | def _get_labels_batch(self, suggestion_batch: SuggestionBatch) -> list[list[str]]: |
||
119 | return [ |
||
120 | [ |
||
121 | self.project.subjects[suggestion.subject_id].labels[ |
||
122 | "en" |
||
123 | ] # TODO: make language selectable |
||
124 | for suggestion in suggestion_result |
||
125 | ] |
||
126 | for suggestion_result in suggestion_batch |
||
127 | ] |
||
128 | |||
129 | def _truncate_text(self, text, encoding): |
||
130 | """truncate text so it contains at most MAX_PROMPT_TOKENS according to the |
||
131 | OpenAI tokenizer""" |
||
132 | |||
133 | MAX_PROMPT_TOKENS = 14000 |
||
134 | tokens = encoding.encode(text) |
||
135 | return encoding.decode(tokens[:MAX_PROMPT_TOKENS]) |
||
136 | |||
137 | def _call_llm(self, prompt: str, model: str): |
||
161 |