|
1
|
|
|
import sys |
|
2
|
|
|
from abc import ABCMeta |
|
3
|
|
|
from collections import defaultdict |
|
4
|
|
|
from functools import reduce |
|
5
|
|
|
|
|
6
|
|
|
|
|
7
|
|
|
class FitnessValue(object): |
|
8
|
|
|
def __init__(self, value): |
|
9
|
|
|
self.value = value |
|
10
|
|
|
def __abs__(self): |
|
11
|
|
|
return abs(self.value) |
|
12
|
|
|
def __eq__(self, other): |
|
13
|
|
|
return self.value == other.value |
|
14
|
|
|
def __ne__(self, other): |
|
15
|
|
|
return not self.__ne__(other) |
|
16
|
|
|
def __str__(self): |
|
17
|
|
|
return "{:.2f}".format(self.value) |
|
18
|
|
|
def __repr__(self): |
|
19
|
|
|
return "{:.2f}".format(self.value) |
|
20
|
|
View Code Duplication |
class NaturalFitnessValue(FitnessValue): |
|
|
|
|
|
|
21
|
|
|
def __init__(self, value): |
|
22
|
|
|
super(NaturalFitnessValue, self).__init__(value) |
|
23
|
|
|
def __lt__(self, other): |
|
24
|
|
|
return self.value < other.value |
|
25
|
|
|
def __le__(self, other): |
|
26
|
|
|
return self.value <= other.value |
|
27
|
|
|
def __gt__(self, other): |
|
28
|
|
|
return self.value > other.value |
|
29
|
|
|
def __ge__(self, other): |
|
30
|
|
|
return self.value >= other.value |
|
31
|
|
View Code Duplication |
class ReversedFitnessValue(FitnessValue): |
|
|
|
|
|
|
32
|
|
|
def __init__(self, value): |
|
33
|
|
|
super(ReversedFitnessValue, self).__init__(value) |
|
34
|
|
|
def __lt__(self, other): |
|
35
|
|
|
return self.value > other.value |
|
36
|
|
|
def __le__(self, other): |
|
37
|
|
|
return self.value >= other.value |
|
38
|
|
|
def __gt__(self, other): |
|
39
|
|
|
return self.value < other.value |
|
40
|
|
|
def __ge__(self, other): |
|
41
|
|
|
return self.value <= other.value |
|
42
|
|
|
|
|
43
|
|
|
|
|
44
|
|
|
_ORDERING_HASH = defaultdict(lambda: 'natural', perplexity='reversed') |
|
45
|
|
|
_INITIAL_BEST = defaultdict(lambda: 0, perplexity=float('inf')) |
|
46
|
|
|
_FITNESS_VALUE_CONSTRUCTORS_HASH = {'natural': NaturalFitnessValue, 'reversed': ReversedFitnessValue} |
|
47
|
|
|
|
|
48
|
|
|
|
|
49
|
|
|
class FitnessFunctionBuilder(object): |
|
50
|
|
|
def __init__(self): |
|
51
|
|
|
self._column_definitions = [] |
|
52
|
|
|
self._extractors = [] |
|
53
|
|
|
self._coeff_values = [] |
|
54
|
|
|
self._order = '' |
|
55
|
|
|
self._names = [] |
|
56
|
|
|
|
|
57
|
|
|
def _create_extractor(self, column_definition): |
|
58
|
|
|
return lambda x: x[self._column_definitions.index(column_definition)] |
|
59
|
|
|
|
|
60
|
|
|
def start(self, column_definitions, ordering='natural'): |
|
61
|
|
|
self._column_definitions = column_definitions |
|
62
|
|
|
self._extractors = [] |
|
63
|
|
|
self._coeff_values = [] |
|
64
|
|
|
self._order = ordering |
|
65
|
|
|
self._names = [] |
|
66
|
|
|
return self |
|
67
|
|
|
|
|
68
|
|
|
def coefficient(self, name, value): |
|
69
|
|
|
assert name in self._column_definitions |
|
70
|
|
|
self._extractors.append(self._create_extractor(name)) |
|
71
|
|
|
self._coeff_values.append(value) |
|
72
|
|
|
self._names.append(name) |
|
73
|
|
|
return self |
|
74
|
|
|
|
|
75
|
|
|
def build(self): |
|
76
|
|
|
return FitnessFunction(self._extractors, self._coeff_values, self._names, ordering=self._order) |
|
77
|
|
|
|
|
78
|
|
|
function_builder = FitnessFunctionBuilder() |
|
79
|
|
|
|
|
80
|
|
|
|
|
81
|
|
|
class FitnessFunction(object): |
|
82
|
|
|
def __init__(self, extractors, coefficients, names, ordering='natural'): |
|
83
|
|
|
self._extr = extractors |
|
84
|
|
|
self._coeff = coefficients |
|
85
|
|
|
self._names = names |
|
86
|
|
|
self._order = ordering |
|
87
|
|
|
assert ordering in ('natural', 'reversed') |
|
88
|
|
|
assert abs(sum(map(abs, self._coeff))) - 1 < 1e-6 |
|
89
|
|
|
|
|
90
|
|
|
@classmethod |
|
91
|
|
|
def single_metric(cls, metric_definition): |
|
92
|
|
|
return function_builder.start([metric_definition], ordering=_ORDERING_HASH[metric_definition]).coefficient(metric_definition, 1).build() |
|
93
|
|
|
|
|
94
|
|
|
@property |
|
95
|
|
|
def ordering(self): |
|
96
|
|
|
return self._order |
|
97
|
|
|
|
|
98
|
|
|
def compute(self, individual): |
|
99
|
|
|
""" |
|
100
|
|
|
:param list individual: list of values for metrics [prpl, top-tokens-coh-10, ..] |
|
101
|
|
|
:return: |
|
102
|
|
|
""" |
|
103
|
|
|
try: |
|
104
|
|
|
c = [x(individual) for x in self._extr] |
|
105
|
|
|
except IndexError as e: |
|
106
|
|
|
raise IndexError("Error: {}. Current builder column definitions: [{}], Input: [{}]".format(e, ', '.join(str(_) for _ in sorted(function_builder._column_definitions)), |
|
107
|
|
|
', '.join(str(_) for _ in sorted(individual)))) |
|
108
|
|
|
c1 = [self._wrap(x) for x in c] |
|
109
|
|
|
c2 = [self._coeff[i] * x for i,x in enumerate(c1)] |
|
110
|
|
|
c3 = reduce(lambda i,j: i+j, c2) |
|
111
|
|
|
c4 = _FITNESS_VALUE_CONSTRUCTORS_HASH[self._order] |
|
112
|
|
|
r = c4(c3) |
|
113
|
|
|
return r |
|
114
|
|
|
# return _FITNESS_VALUE_CONSTRUCTORS_HASH[self._order]( |
|
115
|
|
|
# reduce(lambda i, j: i + j, [x[0] * self._wrap(x[1](individual)) for x in zip(self._coeff, self._extr)])) |
|
116
|
|
|
|
|
117
|
|
|
def _wrap(self, value): |
|
118
|
|
|
if value is None: |
|
119
|
|
|
return {'natural': 0, 'reversed': float('inf')}[self._order] |
|
120
|
|
|
return value |
|
121
|
|
|
|
|
122
|
|
|
def __str__(self): |
|
123
|
|
|
return ' + '.join(['{}*{}'.format(x[0], x[1]) for x in zip(self._names, self._coeff)]) |
|
124
|
|
|
|
|
125
|
|
|
|
|
126
|
|
|
class FitnessCalculator(object): |
|
127
|
|
|
def __new__(cls, *args, **kwargs): |
|
128
|
|
|
x = super(FitnessCalculator, cls).__new__(cls) |
|
129
|
|
|
x._func = None |
|
130
|
|
|
x._column_defs, x._best = [], [] |
|
131
|
|
|
x._highlightable_columns = [] |
|
132
|
|
|
return x |
|
133
|
|
|
|
|
134
|
|
|
def __init__(self, single_metric=None, column_definitions=None): |
|
135
|
|
|
if type(single_metric) == str and type(column_definitions) == list: |
|
136
|
|
|
# FitnessFunctionBuilder.start(column_definitions).coefficient(single_metric, 1).build() |
|
137
|
|
|
self._func = function_builder.start(column_definitions, ordering=_ORDERING_HASH[single_metric]).coefficient(single_metric, 1).build() |
|
138
|
|
|
assert isinstance(self._func, FitnessFunction) |
|
139
|
|
|
self._column_defs = column_definitions |
|
140
|
|
|
|
|
141
|
|
|
# def initialize(self, fitness_function, column_definitions): |
|
142
|
|
|
# """ |
|
143
|
|
|
# :param FitnessFunction fitness_function: |
|
144
|
|
|
# :param list column_definitions: i.e. ['perplexity', 'kernel-coherence-0.8', 'kernel-contrast-0.8', 'top-tokens-coherence-10', 'top-tokens-coherence-100'] |
|
145
|
|
|
# """ |
|
146
|
|
|
# assert isinstance(fitness_function, FitnessFunction) |
|
147
|
|
|
# self.function = fitness_function |
|
148
|
|
|
# self._column_defs = column_definitions |
|
149
|
|
|
|
|
150
|
|
|
@property |
|
151
|
|
|
def highlightable_columns(self): |
|
152
|
|
|
return self._highlightable_columns |
|
153
|
|
|
|
|
154
|
|
|
@highlightable_columns.setter |
|
155
|
|
|
def highlightable_columns(self, column_definitions): |
|
156
|
|
|
self._highlightable_columns = column_definitions |
|
157
|
|
|
self._best = dict([(x, _INITIAL_BEST[FitnessCalculator._get_column_key(x)]) for x in column_definitions]) |
|
158
|
|
|
|
|
159
|
|
|
@property |
|
160
|
|
|
def best(self): |
|
161
|
|
|
return self._best |
|
162
|
|
|
|
|
163
|
|
|
@property |
|
164
|
|
|
def function(self): |
|
165
|
|
|
return self._func |
|
166
|
|
|
|
|
167
|
|
|
# @function.setter |
|
168
|
|
|
# def function(self, a_fitness_function): |
|
169
|
|
|
# self._func = a_fitness_function |
|
170
|
|
|
|
|
171
|
|
|
def pass_vector(self, values_vector): |
|
172
|
|
|
self._update_best(values_vector) |
|
173
|
|
|
return values_vector |
|
174
|
|
|
|
|
175
|
|
|
def compute_fitness(self, values_vector): |
|
176
|
|
|
self._update_best(values_vector) |
|
177
|
|
|
return self._func.compute(values_vector) |
|
178
|
|
|
|
|
179
|
|
|
def _update_best(self, values_vector): |
|
180
|
|
|
self._best.update([(column_def, value) for column_key, column_def, value in |
|
181
|
|
|
[(FitnessCalculator._get_column_key(x[0]), x[0], x[1]) for x in zip(self._column_defs, values_vector)] |
|
182
|
|
|
if column_def in self._best and |
|
183
|
|
|
FitnessCalculator._fitness(column_key, value) > FitnessCalculator._fitness(column_key, self._best[column_def])]) |
|
184
|
|
|
|
|
185
|
|
|
def __call__(self, *args, **kwargs): |
|
186
|
|
|
return self.compute_fitness(args[0]) |
|
187
|
|
|
|
|
188
|
|
|
@staticmethod |
|
189
|
|
|
def _fitness(column_key, value): |
|
190
|
|
|
if value is None: |
|
191
|
|
|
return _FITNESS_VALUE_CONSTRUCTORS_HASH[_ORDERING_HASH[column_key]](_INITIAL_BEST[column_key]) |
|
192
|
|
|
return _FITNESS_VALUE_CONSTRUCTORS_HASH[_ORDERING_HASH[column_key]](value) |
|
193
|
|
|
|
|
194
|
|
|
@staticmethod |
|
195
|
|
|
def _get_column_key(column_definition): |
|
196
|
|
|
return '-'.join([_f for _f in map(FitnessCalculator._get_token, column_definition.split('-')) if _f]) |
|
197
|
|
|
# return '-'.join([_f for _f in [FitnessCalculator._get_token(x) for x in column_definition.split('-')] if _f]) |
|
198
|
|
|
|
|
199
|
|
|
@staticmethod |
|
200
|
|
|
def _get_token(definition_element): |
|
201
|
|
|
try: |
|
202
|
|
|
_ = float(definition_element) |
|
203
|
|
|
return None |
|
204
|
|
|
except ValueError: |
|
205
|
|
|
if definition_element[0] == '@' or len(definition_element) == 1: |
|
206
|
|
|
return None |
|
207
|
|
|
return definition_element |
|
208
|
|
|
|
|
209
|
|
|
# def _query_vector(self, values_vector, column_key): |
|
210
|
|
|
# """Call this method to get a list of tuples; 1st elements are the vactor values, 2nd elements are the corresponding column definitions""" |
|
211
|
|
|
# return map(lambda x: (values_vector[x[0]], x[1]), [_ for _ in enumerate(self._column_defs) if _[1].startswith(column_key)]) |
|
212
|
|
|
|