1
|
|
|
import sys |
2
|
|
|
from abc import ABCMeta |
3
|
|
|
from collections import defaultdict |
4
|
|
|
from functools import reduce |
5
|
|
|
|
6
|
|
|
|
7
|
|
|
class FitnessValue(object): |
8
|
|
|
def __init__(self, value): |
9
|
|
|
self.value = value |
10
|
|
|
def __abs__(self): |
11
|
|
|
return abs(self.value) |
12
|
|
|
def __eq__(self, other): |
13
|
|
|
return self.value == other.value |
14
|
|
|
def __ne__(self, other): |
15
|
|
|
return not self.__ne__(other) |
16
|
|
|
def __str__(self): |
17
|
|
|
return "{:.2f}".format(self.value) |
18
|
|
|
def __repr__(self): |
19
|
|
|
return "{:.2f}".format(self.value) |
20
|
|
View Code Duplication |
class NaturalFitnessValue(FitnessValue): |
|
|
|
|
21
|
|
|
def __init__(self, value): |
22
|
|
|
super(NaturalFitnessValue, self).__init__(value) |
23
|
|
|
def __lt__(self, other): |
24
|
|
|
return self.value < other.value |
25
|
|
|
def __le__(self, other): |
26
|
|
|
return self.value <= other.value |
27
|
|
|
def __gt__(self, other): |
28
|
|
|
return self.value > other.value |
29
|
|
|
def __ge__(self, other): |
30
|
|
|
return self.value >= other.value |
31
|
|
View Code Duplication |
class ReversedFitnessValue(FitnessValue): |
|
|
|
|
32
|
|
|
def __init__(self, value): |
33
|
|
|
super(ReversedFitnessValue, self).__init__(value) |
34
|
|
|
def __lt__(self, other): |
35
|
|
|
return self.value > other.value |
36
|
|
|
def __le__(self, other): |
37
|
|
|
return self.value >= other.value |
38
|
|
|
def __gt__(self, other): |
39
|
|
|
return self.value < other.value |
40
|
|
|
def __ge__(self, other): |
41
|
|
|
return self.value <= other.value |
42
|
|
|
|
43
|
|
|
|
44
|
|
|
_ORDERING_HASH = defaultdict(lambda: 'natural', perplexity='reversed') |
45
|
|
|
_INITIAL_BEST = defaultdict(lambda: 0, perplexity=float('inf')) |
46
|
|
|
_FITNESS_VALUE_CONSTRUCTORS_HASH = {'natural': NaturalFitnessValue, 'reversed': ReversedFitnessValue} |
47
|
|
|
|
48
|
|
|
|
49
|
|
|
class FitnessFunctionBuilder(object): |
50
|
|
|
def __init__(self): |
51
|
|
|
self._column_definitions = [] |
52
|
|
|
self._extractors = [] |
53
|
|
|
self._coeff_values = [] |
54
|
|
|
self._order = '' |
55
|
|
|
self._names = [] |
56
|
|
|
|
57
|
|
|
def _create_extractor(self, column_definition): |
58
|
|
|
return lambda x: x[self._column_definitions.index(column_definition)] |
59
|
|
|
|
60
|
|
|
def start(self, column_definitions, ordering='natural'): |
61
|
|
|
self._column_definitions = column_definitions |
62
|
|
|
self._extractors = [] |
63
|
|
|
self._coeff_values = [] |
64
|
|
|
self._order = ordering |
65
|
|
|
self._names = [] |
66
|
|
|
return self |
67
|
|
|
|
68
|
|
|
def coefficient(self, name, value): |
69
|
|
|
assert name in self._column_definitions |
70
|
|
|
self._extractors.append(self._create_extractor(name)) |
71
|
|
|
self._coeff_values.append(value) |
72
|
|
|
self._names.append(name) |
73
|
|
|
return self |
74
|
|
|
|
75
|
|
|
def build(self): |
76
|
|
|
return FitnessFunction(self._extractors, self._coeff_values, self._names, ordering=self._order) |
77
|
|
|
|
78
|
|
|
function_builder = FitnessFunctionBuilder() |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
class FitnessFunction(object): |
82
|
|
|
def __init__(self, extractors, coefficients, names, ordering='natural'): |
83
|
|
|
self._extr = extractors |
84
|
|
|
self._coeff = coefficients |
85
|
|
|
self._names = names |
86
|
|
|
self._order = ordering |
87
|
|
|
assert ordering in ('natural', 'reversed') |
88
|
|
|
assert abs(sum(map(abs, self._coeff))) - 1 < 1e-6 |
89
|
|
|
|
90
|
|
|
@classmethod |
91
|
|
|
def single_metric(cls, metric_definition): |
92
|
|
|
return function_builder.start([metric_definition], ordering=_ORDERING_HASH[metric_definition]).coefficient(metric_definition, 1).build() |
93
|
|
|
|
94
|
|
|
@property |
95
|
|
|
def ordering(self): |
96
|
|
|
return self._order |
97
|
|
|
|
98
|
|
|
def compute(self, individual): |
99
|
|
|
""" |
100
|
|
|
:param list individual: list of values for metrics [prpl, top-tokens-coh-10, ..] |
101
|
|
|
:return: |
102
|
|
|
""" |
103
|
|
|
try: |
104
|
|
|
c = [x(individual) for x in self._extr] |
105
|
|
|
except IndexError as e: |
106
|
|
|
raise IndexError("Error: {}. Current builder column definitions: [{}], Input: [{}]".format(e, ', '.join(str(_) for _ in sorted(function_builder._column_definitions)), |
107
|
|
|
', '.join(str(_) for _ in sorted(individual)))) |
108
|
|
|
c1 = [self._wrap(x) for x in c] |
109
|
|
|
c2 = [self._coeff[i] * x for i,x in enumerate(c1)] |
110
|
|
|
c3 = reduce(lambda i,j: i+j, c2) |
111
|
|
|
c4 = _FITNESS_VALUE_CONSTRUCTORS_HASH[self._order] |
112
|
|
|
r = c4(c3) |
113
|
|
|
return r |
114
|
|
|
# return _FITNESS_VALUE_CONSTRUCTORS_HASH[self._order]( |
115
|
|
|
# reduce(lambda i, j: i + j, [x[0] * self._wrap(x[1](individual)) for x in zip(self._coeff, self._extr)])) |
116
|
|
|
|
117
|
|
|
def _wrap(self, value): |
118
|
|
|
if value is None: |
119
|
|
|
return {'natural': 0, 'reversed': float('inf')}[self._order] |
120
|
|
|
return value |
121
|
|
|
|
122
|
|
|
def __str__(self): |
123
|
|
|
return ' + '.join(['{}*{}'.format(x[0], x[1]) for x in zip(self._names, self._coeff)]) |
124
|
|
|
|
125
|
|
|
|
126
|
|
|
class FitnessCalculator(object): |
127
|
|
|
def __new__(cls, *args, **kwargs): |
128
|
|
|
x = super(FitnessCalculator, cls).__new__(cls) |
129
|
|
|
x._func = None |
130
|
|
|
x._column_defs, x._best = [], [] |
131
|
|
|
x._highlightable_columns = [] |
132
|
|
|
return x |
133
|
|
|
|
134
|
|
|
def __init__(self, single_metric=None, column_definitions=None): |
135
|
|
|
if type(single_metric) == str and type(column_definitions) == list: |
136
|
|
|
# FitnessFunctionBuilder.start(column_definitions).coefficient(single_metric, 1).build() |
137
|
|
|
self._func = function_builder.start(column_definitions, ordering=_ORDERING_HASH[single_metric]).coefficient(single_metric, 1).build() |
138
|
|
|
assert isinstance(self._func, FitnessFunction) |
139
|
|
|
self._column_defs = column_definitions |
140
|
|
|
|
141
|
|
|
# def initialize(self, fitness_function, column_definitions): |
142
|
|
|
# """ |
143
|
|
|
# :param FitnessFunction fitness_function: |
144
|
|
|
# :param list column_definitions: i.e. ['perplexity', 'kernel-coherence-0.8', 'kernel-contrast-0.8', 'top-tokens-coherence-10', 'top-tokens-coherence-100'] |
145
|
|
|
# """ |
146
|
|
|
# assert isinstance(fitness_function, FitnessFunction) |
147
|
|
|
# self.function = fitness_function |
148
|
|
|
# self._column_defs = column_definitions |
149
|
|
|
|
150
|
|
|
@property |
151
|
|
|
def highlightable_columns(self): |
152
|
|
|
return self._highlightable_columns |
153
|
|
|
|
154
|
|
|
@highlightable_columns.setter |
155
|
|
|
def highlightable_columns(self, column_definitions): |
156
|
|
|
self._highlightable_columns = column_definitions |
157
|
|
|
self._best = dict([(x, _INITIAL_BEST[FitnessCalculator._get_column_key(x)]) for x in column_definitions]) |
158
|
|
|
|
159
|
|
|
@property |
160
|
|
|
def best(self): |
161
|
|
|
return self._best |
162
|
|
|
|
163
|
|
|
@property |
164
|
|
|
def function(self): |
165
|
|
|
return self._func |
166
|
|
|
|
167
|
|
|
# @function.setter |
168
|
|
|
# def function(self, a_fitness_function): |
169
|
|
|
# self._func = a_fitness_function |
170
|
|
|
|
171
|
|
|
def pass_vector(self, values_vector): |
172
|
|
|
self._update_best(values_vector) |
173
|
|
|
return values_vector |
174
|
|
|
|
175
|
|
|
def compute_fitness(self, values_vector): |
176
|
|
|
self._update_best(values_vector) |
177
|
|
|
return self._func.compute(values_vector) |
178
|
|
|
|
179
|
|
|
def _update_best(self, values_vector): |
180
|
|
|
self._best.update([(column_def, value) for column_key, column_def, value in |
181
|
|
|
[(FitnessCalculator._get_column_key(x[0]), x[0], x[1]) for x in zip(self._column_defs, values_vector)] |
182
|
|
|
if column_def in self._best and |
183
|
|
|
FitnessCalculator._fitness(column_key, value) > FitnessCalculator._fitness(column_key, self._best[column_def])]) |
184
|
|
|
|
185
|
|
|
def __call__(self, *args, **kwargs): |
186
|
|
|
return self.compute_fitness(args[0]) |
187
|
|
|
|
188
|
|
|
@staticmethod |
189
|
|
|
def _fitness(column_key, value): |
190
|
|
|
if value is None: |
191
|
|
|
return _FITNESS_VALUE_CONSTRUCTORS_HASH[_ORDERING_HASH[column_key]](_INITIAL_BEST[column_key]) |
192
|
|
|
return _FITNESS_VALUE_CONSTRUCTORS_HASH[_ORDERING_HASH[column_key]](value) |
193
|
|
|
|
194
|
|
|
@staticmethod |
195
|
|
|
def _get_column_key(column_definition): |
196
|
|
|
return '-'.join([_f for _f in map(FitnessCalculator._get_token, column_definition.split('-')) if _f]) |
197
|
|
|
# return '-'.join([_f for _f in [FitnessCalculator._get_token(x) for x in column_definition.split('-')] if _f]) |
198
|
|
|
|
199
|
|
|
@staticmethod |
200
|
|
|
def _get_token(definition_element): |
201
|
|
|
try: |
202
|
|
|
_ = float(definition_element) |
203
|
|
|
return None |
204
|
|
|
except ValueError: |
205
|
|
|
if definition_element[0] == '@' or len(definition_element) == 1: |
206
|
|
|
return None |
207
|
|
|
return definition_element |
208
|
|
|
|
209
|
|
|
# def _query_vector(self, values_vector, column_key): |
210
|
|
|
# """Call this method to get a list of tuples; 1st elements are the vactor values, 2nd elements are the corresponding column definitions""" |
211
|
|
|
# return map(lambda x: (values_vector[x[0]], x[1]), [_ for _ in enumerate(self._column_defs) if _[1].startswith(column_key)]) |
212
|
|
|
|