1
|
|
|
# -*- coding: utf-8 -*- |
2
|
|
|
|
3
|
|
|
# Copyright 2018 by Christopher C. Little. |
4
|
|
|
# This file is part of Abydos. |
5
|
|
|
# |
6
|
|
|
# Abydos is free software: you can redistribute it and/or modify |
7
|
|
|
# it under the terms of the GNU General Public License as published by |
8
|
|
|
# the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
# (at your option) any later version. |
10
|
|
|
# |
11
|
|
|
# Abydos is distributed in the hope that it will be useful, |
12
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
# GNU General Public License for more details. |
15
|
|
|
# |
16
|
|
|
# You should have received a copy of the GNU General Public License |
17
|
|
|
# along with Abydos. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
|
19
|
1 |
|
"""abydos.fingerprint._synoname. |
20
|
|
|
|
21
|
|
|
Synoname toolcode |
22
|
|
|
""" |
23
|
|
|
|
24
|
1 |
|
from __future__ import ( |
25
|
|
|
absolute_import, |
26
|
|
|
division, |
27
|
|
|
print_function, |
28
|
|
|
unicode_literals, |
29
|
|
|
) |
30
|
|
|
|
31
|
1 |
|
from ._fingerprint import _Fingerprint |
32
|
|
|
|
33
|
1 |
|
__all__ = ['SynonameToolcode', 'synoname_toolcode'] |
34
|
|
|
|
35
|
|
|
|
36
|
1 |
|
class SynonameToolcode(_Fingerprint): |
|
|
|
|
37
|
|
|
"""Synoname Toolcode. |
38
|
|
|
|
39
|
|
|
Cf. :cite:`Getty:1991,Gross:1991`. |
40
|
|
|
""" |
41
|
|
|
|
42
|
1 |
|
_synoname_special_table = ( |
43
|
|
|
# Roman, match, extra, method |
44
|
|
|
(False, 'NONE', '', 0), |
45
|
|
|
(False, 'aine', '', 3), |
46
|
|
|
(False, 'also erroneously', '', 4), |
47
|
|
|
(False, 'also identified with the', '', 2), |
48
|
|
|
(False, 'also identified with', '', 2), |
49
|
|
|
(False, 'archbishop', '', 7), |
50
|
|
|
(False, 'atelier', '', 7), |
51
|
|
|
(False, 'baron', '', 7), |
52
|
|
|
(False, 'cadet', '', 3), |
53
|
|
|
(False, 'cardinal', '', 7), |
54
|
|
|
(False, 'circle of', '', 5), |
55
|
|
|
(False, 'circle', '', 5), |
56
|
|
|
(False, 'class of', '', 5), |
57
|
|
|
(False, 'conde de', '', 7), |
58
|
|
|
(False, 'countess', '', 7), |
59
|
|
|
(False, 'count', '', 7), |
60
|
|
|
(False, "d'", " d'", 15), |
61
|
|
|
(False, 'dai', '', 15), |
62
|
|
|
(False, "dall'", " dall'", 15), |
63
|
|
|
(False, 'dalla', '', 15), |
64
|
|
|
(False, 'dalle', '', 15), |
65
|
|
|
(False, 'dal', '', 15), |
66
|
|
|
(False, 'da', '', 15), |
67
|
|
|
(False, 'degli', '', 15), |
68
|
|
|
(False, 'della', '', 15), |
69
|
|
|
(False, 'del', '', 15), |
70
|
|
|
(False, 'den', '', 15), |
71
|
|
|
(False, 'der altere', '', 3), |
72
|
|
|
(False, 'der jungere', '', 3), |
73
|
|
|
(False, 'der', '', 15), |
74
|
|
|
(False, 'de la', '', 15), |
75
|
|
|
(False, 'des', '', 15), |
76
|
|
|
(False, "de'", " de'", 15), |
77
|
|
|
(False, 'de', '', 15), |
78
|
|
|
(False, 'di ser', '', 7), |
79
|
|
|
(False, 'di', '', 15), |
80
|
|
|
(False, 'dos', '', 15), |
81
|
|
|
(False, 'du', '', 15), |
82
|
|
|
(False, 'duke of', '', 7), |
83
|
|
|
(False, 'earl of', '', 7), |
84
|
|
|
(False, 'el', '', 15), |
85
|
|
|
(False, 'fils', '', 3), |
86
|
|
|
(False, 'florentine follower of', '', 5), |
87
|
|
|
(False, 'follower of', '', 5), |
88
|
|
|
(False, 'fra', '', 7), |
89
|
|
|
(False, 'freiherr von', '', 7), |
90
|
|
|
(False, 'giovane', '', 7), |
91
|
|
|
(False, 'group', '', 5), |
92
|
|
|
(True, 'iii', '', 3), |
93
|
|
|
(True, 'ii', '', 3), |
94
|
|
|
(False, 'il giovane', '', 7), |
95
|
|
|
(False, 'il vecchio', '', 7), |
96
|
|
|
(False, 'il', '', 15), |
97
|
|
|
(False, "in't", '', 7), |
98
|
|
|
(False, 'in het', '', 7), |
99
|
|
|
(True, 'iv', '', 3), |
100
|
|
|
(True, 'ix', '', 3), |
101
|
|
|
(True, 'i', '', 3), |
102
|
|
|
(False, 'jr.', '', 3), |
103
|
|
|
(False, 'jr', '', 3), |
104
|
|
|
(False, 'juniore', '', 3), |
105
|
|
|
(False, 'junior', '', 3), |
106
|
|
|
(False, 'king of', '', 7), |
107
|
|
|
(False, "l'", " l'", 15), |
108
|
|
|
(False, "l'aine", '', 3), |
109
|
|
|
(False, 'la', '', 15), |
110
|
|
|
(False, 'le jeune', '', 3), |
111
|
|
|
(False, 'le', '', 15), |
112
|
|
|
(False, 'lo', '', 15), |
113
|
|
|
(False, 'maestro', '', 7), |
114
|
|
|
(False, 'maitre', '', 7), |
115
|
|
|
(False, 'marchioness', '', 7), |
116
|
|
|
(False, 'markgrafin von', '', 7), |
117
|
|
|
(False, 'marquess', '', 7), |
118
|
|
|
(False, 'marquis', '', 7), |
119
|
|
|
(False, 'master of the', '', 7), |
120
|
|
|
(False, 'master of', '', 7), |
121
|
|
|
(False, 'master known as the', '', 7), |
122
|
|
|
(False, 'master with the', '', 7), |
123
|
|
|
(False, 'master with', '', 7), |
124
|
|
|
(False, 'masters', '', 7), |
125
|
|
|
(False, 'master', '', 7), |
126
|
|
|
(False, 'meister', '', 7), |
127
|
|
|
(False, 'met de', '', 7), |
128
|
|
|
(False, 'met', '', 7), |
129
|
|
|
(False, 'mlle.', '', 7), |
130
|
|
|
(False, 'mlle', '', 7), |
131
|
|
|
(False, 'monogrammist', '', 7), |
132
|
|
|
(False, 'monsu', '', 7), |
133
|
|
|
(False, 'nee', '', 2), |
134
|
|
|
(False, 'of', '', 3), |
135
|
|
|
(False, 'oncle', '', 3), |
136
|
|
|
(False, 'op den', '', 15), |
137
|
|
|
(False, 'op de', '', 15), |
138
|
|
|
(False, 'or', '', 2), |
139
|
|
|
(False, 'over den', '', 15), |
140
|
|
|
(False, 'over de', '', 15), |
141
|
|
|
(False, 'over', '', 7), |
142
|
|
|
(False, 'p.re', '', 7), |
143
|
|
|
(False, 'p.r.a.', '', 1), |
144
|
|
|
(False, 'padre', '', 7), |
145
|
|
|
(False, 'painter', '', 7), |
146
|
|
|
(False, 'pere', '', 3), |
147
|
|
|
(False, 'possibly identified with', '', 6), |
148
|
|
|
(False, 'possibly', '', 6), |
149
|
|
|
(False, 'pseudo', '', 15), |
150
|
|
|
(False, 'r.a.', '', 1), |
151
|
|
|
(False, 'reichsgraf von', '', 7), |
152
|
|
|
(False, 'ritter von', '', 7), |
153
|
|
|
(False, 'sainte-', ' sainte-', 8), |
154
|
|
|
(False, 'sainte', '', 7), |
155
|
|
|
(False, 'saint-', ' saint-', 8), |
156
|
|
|
(False, 'saint', '', 7), |
157
|
|
|
(False, 'santa', '', 15), |
158
|
|
|
(False, "sant'", " sant'", 15), |
159
|
|
|
(False, 'san', '', 15), |
160
|
|
|
(False, 'ser', '', 7), |
161
|
|
|
(False, 'seniore', '', 3), |
162
|
|
|
(False, 'senior', '', 3), |
163
|
|
|
(False, 'sir', '', 5), |
164
|
|
|
(False, 'sr.', '', 3), |
165
|
|
|
(False, 'sr', '', 3), |
166
|
|
|
(False, 'ss.', ' ss.', 14), |
167
|
|
|
(False, 'ss', '', 6), |
168
|
|
|
(False, 'st-', ' st-', 8), |
169
|
|
|
(False, 'st.', ' st.', 15), |
170
|
|
|
(False, 'ste-', ' ste-', 8), |
171
|
|
|
(False, 'ste.', ' ste.', 15), |
172
|
|
|
(False, 'studio', '', 7), |
173
|
|
|
(False, 'sub-group', '', 5), |
174
|
|
|
(False, 'sultan of', '', 7), |
175
|
|
|
(False, 'ten', '', 15), |
176
|
|
|
(False, 'ter', '', 15), |
177
|
|
|
(False, 'the elder', '', 3), |
178
|
|
|
(False, 'the younger', '', 3), |
179
|
|
|
(False, 'the', '', 7), |
180
|
|
|
(False, 'tot', '', 15), |
181
|
|
|
(False, 'unidentified', '', 1), |
182
|
|
|
(False, 'van den', '', 15), |
183
|
|
|
(False, 'van der', '', 15), |
184
|
|
|
(False, 'van de', '', 15), |
185
|
|
|
(False, 'vanden', '', 15), |
186
|
|
|
(False, 'vander', '', 15), |
187
|
|
|
(False, 'van', '', 15), |
188
|
|
|
(False, 'vecchia', '', 7), |
189
|
|
|
(False, 'vecchio', '', 7), |
190
|
|
|
(True, 'viii', '', 3), |
191
|
|
|
(True, 'vii', '', 3), |
192
|
|
|
(True, 'vi', '', 3), |
193
|
|
|
(True, 'v', '', 3), |
194
|
|
|
(False, 'vom', '', 7), |
195
|
|
|
(False, 'von', '', 15), |
196
|
|
|
(False, 'workshop', '', 7), |
197
|
|
|
(True, 'xiii', '', 3), |
198
|
|
|
(True, 'xii', '', 3), |
199
|
|
|
(True, 'xiv', '', 3), |
200
|
|
|
(True, 'xix', '', 3), |
201
|
|
|
(True, 'xi', '', 3), |
202
|
|
|
(True, 'xviii', '', 3), |
203
|
|
|
(True, 'xvii', '', 3), |
204
|
|
|
(True, 'xvi', '', 3), |
205
|
|
|
(True, 'xv', '', 3), |
206
|
|
|
(True, 'xx', '', 3), |
207
|
|
|
(True, 'x', '', 3), |
208
|
|
|
(False, 'y', '', 7), |
209
|
|
|
) |
210
|
|
|
|
211
|
1 |
|
_method_dict = { |
212
|
|
|
'end': 1, |
213
|
|
|
'middle': 2, |
214
|
|
|
'beginning': 4, |
215
|
|
|
'beginning_no_space': 8, |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
# Fill field 0 (qualifier) |
219
|
1 |
|
_qual_3 = { |
220
|
|
|
'adaptation after', |
221
|
|
|
'after', |
222
|
|
|
'assistant of', |
223
|
|
|
'assistants of', |
224
|
|
|
'circle of', |
225
|
|
|
'follower of', |
226
|
|
|
'imitator of', |
227
|
|
|
'in the style of', |
228
|
|
|
'manner of', |
229
|
|
|
'pupil of', |
230
|
|
|
'school of', |
231
|
|
|
'studio of', |
232
|
|
|
'style of', |
233
|
|
|
'workshop of', |
234
|
|
|
} |
235
|
1 |
|
_qual_2 = {'copy after', 'copy after?', 'copy of'} |
236
|
1 |
|
_qual_1 = { |
237
|
|
|
'ascribed to', |
238
|
|
|
'attributed to or copy after', |
239
|
|
|
'attributed to', |
240
|
|
|
'possibly', |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
# Fill field 2 (generation) |
244
|
1 |
|
_gen_1 = ( |
245
|
|
|
'the elder', |
246
|
|
|
' sr.', |
247
|
|
|
' sr', |
248
|
|
|
'senior', |
249
|
|
|
'der altere', |
250
|
|
|
'il vecchio', |
251
|
|
|
"l'aine", |
252
|
|
|
'p.re', |
253
|
|
|
'padre', |
254
|
|
|
'seniore', |
255
|
|
|
'vecchia', |
256
|
|
|
'vecchio', |
257
|
|
|
) |
258
|
1 |
|
_gen_2 = ( |
259
|
|
|
' jr.', |
260
|
|
|
' jr', |
261
|
|
|
'der jungere', |
262
|
|
|
'il giovane', |
263
|
|
|
'giovane', |
264
|
|
|
'juniore', |
265
|
|
|
'junior', |
266
|
|
|
'le jeune', |
267
|
|
|
'the younger', |
268
|
|
|
) |
269
|
|
|
|
270
|
1 |
|
def fingerprint(self, lname, fname='', qual='', normalize=0): |
|
|
|
|
271
|
|
|
"""Build the Synoname toolcode. |
272
|
|
|
|
273
|
|
|
Parameters |
274
|
|
|
---------- |
275
|
|
|
lname : str |
276
|
|
|
Last name |
277
|
|
|
fname : str |
278
|
|
|
First name (can be blank) |
279
|
|
|
qual : str |
280
|
|
|
Qualifier |
281
|
|
|
normalize : int |
282
|
|
|
Normalization mode (0, 1, or 2) |
283
|
|
|
|
284
|
|
|
Returns |
285
|
|
|
------- |
286
|
|
|
tuple |
287
|
|
|
The transformed names and the synoname toolcode |
288
|
|
|
|
289
|
|
|
Examples |
290
|
|
|
-------- |
291
|
|
|
>>> st = SynonameToolcode() |
292
|
|
|
>>> st.fingerprint('hat') |
293
|
|
|
('hat', '', '0000000003$$h') |
294
|
|
|
>>> st.fingerprint('niall') |
295
|
|
|
('niall', '', '0000000005$$n') |
296
|
|
|
>>> st.fingerprint('colin') |
297
|
|
|
('colin', '', '0000000005$$c') |
298
|
|
|
>>> st.fingerprint('atcg') |
299
|
|
|
('atcg', '', '0000000004$$a') |
300
|
|
|
>>> st.fingerprint('entreatment') |
301
|
|
|
('entreatment', '', '0000000011$$e') |
302
|
|
|
|
303
|
|
|
>>> st.fingerprint('Ste.-Marie', 'Count John II', normalize=2) |
304
|
|
|
('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji') |
305
|
|
|
>>> st.fingerprint('Michelangelo IV', '', 'Workshop of') |
306
|
|
|
('michelangelo iv', '', '3000550015$055b$mi') |
307
|
|
|
|
308
|
|
|
""" |
309
|
1 |
|
lname = lname.lower() |
310
|
1 |
|
fname = fname.lower() |
311
|
1 |
|
qual = qual.lower() |
312
|
|
|
|
313
|
|
|
# Start with the basic code |
314
|
1 |
|
toolcode = ['0', '0', '0', '000', '00', '00', '$', '', '$', ''] |
315
|
|
|
|
316
|
1 |
|
full_name = ' '.join((lname, fname)) |
317
|
|
|
|
318
|
1 |
|
if qual in self._qual_3: |
319
|
1 |
|
toolcode[0] = '3' |
320
|
1 |
|
elif qual in self._qual_2: |
321
|
1 |
|
toolcode[0] = '2' |
322
|
1 |
|
elif qual in self._qual_1: |
323
|
1 |
|
toolcode[0] = '1' |
324
|
|
|
|
325
|
|
|
# Fill field 1 (punctuation) |
326
|
1 |
|
if '.' in full_name: |
327
|
1 |
|
toolcode[1] = '2' |
328
|
|
|
else: |
329
|
1 |
|
for punct in ',-/:;"&\'()!{|}?$%*+<=>[\\]^_`~': |
330
|
1 |
|
if punct in full_name: |
331
|
1 |
|
toolcode[1] = '1' |
332
|
1 |
|
break |
333
|
|
|
|
334
|
1 |
|
elderyounger = '' # save elder/younger for possible movement later |
335
|
1 |
|
for gen in self._gen_1: |
336
|
1 |
|
if gen in full_name: |
337
|
1 |
|
toolcode[2] = '1' |
338
|
1 |
|
elderyounger = gen |
339
|
1 |
|
break |
340
|
|
|
else: |
341
|
1 |
|
for gen in self._gen_2: |
342
|
1 |
|
if gen in full_name: |
343
|
1 |
|
toolcode[2] = '2' |
344
|
1 |
|
elderyounger = gen |
345
|
1 |
|
break |
346
|
|
|
|
347
|
|
|
# do comma flip |
348
|
1 |
|
if normalize: |
349
|
1 |
|
comma = lname.find(',') |
350
|
1 |
|
if comma != -1: |
351
|
1 |
|
lname_end = lname[comma + 1 :] |
352
|
1 |
|
while lname_end[0] in {' ', ','}: |
353
|
1 |
|
lname_end = lname_end[1:] |
354
|
1 |
|
fname = lname_end + ' ' + fname |
355
|
1 |
|
lname = lname[:comma].strip() |
356
|
|
|
|
357
|
|
|
# do elder/younger move |
358
|
1 |
|
if normalize == 2 and elderyounger: |
359
|
1 |
|
elderyounger_loc = fname.find(elderyounger) |
360
|
1 |
|
if elderyounger_loc != -1: |
361
|
1 |
|
lname = ' '.join((lname, elderyounger.strip())) |
362
|
1 |
|
fname = ' '.join( |
363
|
|
|
( |
364
|
|
|
fname[:elderyounger_loc].strip(), |
365
|
|
|
fname[elderyounger_loc + len(elderyounger) :], |
366
|
|
|
) |
367
|
|
|
).strip() |
368
|
|
|
|
369
|
1 |
|
toolcode[4] = '{:02d}'.format(len(fname)) |
370
|
1 |
|
toolcode[5] = '{:02d}'.format(len(lname)) |
371
|
|
|
|
372
|
|
|
# strip punctuation |
373
|
1 |
|
for char in ',/:;"&()!{|}?$%*+<=>[\\]^_`~': |
374
|
1 |
|
full_name = full_name.replace(char, '') |
375
|
1 |
|
for pos, char in enumerate(full_name): |
376
|
1 |
|
if char == '-' and full_name[pos - 1 : pos + 2] != 'b-g': |
377
|
1 |
|
full_name = full_name[:pos] + ' ' + full_name[pos + 1 :] |
378
|
|
|
|
379
|
|
|
# Fill field 9 (search range) |
380
|
1 |
|
for letter in [_[0] for _ in full_name.split()]: |
381
|
1 |
|
if letter not in toolcode[9]: |
382
|
1 |
|
toolcode[9] += letter |
383
|
1 |
|
if len(toolcode[9]) == 15: |
384
|
1 |
|
break |
385
|
|
|
|
386
|
1 |
|
def roman_check(numeral, fname, lname): |
387
|
|
|
"""Move Roman numerals from first name to last. |
388
|
|
|
|
389
|
|
|
Parameters |
390
|
|
|
---------- |
391
|
|
|
numeral : str |
392
|
|
|
Roman numeral |
393
|
|
|
fname : str |
394
|
|
|
First name |
395
|
|
|
lname : str |
396
|
|
|
Last name |
397
|
|
|
|
398
|
|
|
Returns |
399
|
|
|
------- |
400
|
|
|
tuple |
401
|
|
|
First and last names with Roman numeral moved |
402
|
|
|
|
403
|
|
|
""" |
404
|
1 |
|
loc = fname.find(numeral) |
405
|
1 |
|
if fname and ( |
406
|
|
|
loc != -1 |
|
|
|
|
407
|
|
|
and (len(fname[loc:]) == len(numeral)) |
|
|
|
|
408
|
|
|
or fname[loc + len(numeral)] in {' ', ','} |
|
|
|
|
409
|
|
|
): |
410
|
1 |
|
lname = ' '.join((lname, numeral)) |
411
|
1 |
|
fname = ' '.join( |
412
|
|
|
( |
413
|
|
|
fname[:loc].strip(), |
414
|
|
|
fname[loc + len(numeral) :].lstrip(' ,'), |
415
|
|
|
) |
416
|
|
|
) |
417
|
1 |
|
return fname.strip(), lname.strip() |
418
|
|
|
|
419
|
|
|
# Fill fields 7 (specials) and 3 (roman numerals) |
420
|
1 |
|
for num, special in enumerate(self._synoname_special_table): |
|
|
|
|
421
|
1 |
|
roman, match, extra, method = special |
422
|
1 |
|
if method & self._method_dict['end']: |
423
|
1 |
|
match_context = ' ' + match |
424
|
1 |
|
loc = full_name.find(match_context) |
425
|
1 |
|
if (len(full_name) > len(match_context)) and ( |
426
|
|
|
loc == len(full_name) - len(match_context) |
|
|
|
|
427
|
|
|
): |
428
|
1 |
|
if roman: |
429
|
1 |
|
if not any( |
430
|
|
|
abbr in fname for abbr in ('i.', 'v.', 'x.') |
|
|
|
|
431
|
|
|
): |
432
|
1 |
|
full_name = full_name[:loc] |
433
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'a' |
434
|
1 |
|
if toolcode[3] == '000': |
435
|
1 |
|
toolcode[3] = '{:03d}'.format(num) |
436
|
1 |
|
if normalize == 2: |
437
|
1 |
|
fname, lname = roman_check(match, fname, lname) |
438
|
|
|
else: |
439
|
1 |
|
full_name = full_name[:loc] |
440
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'a' |
441
|
1 |
|
if method & self._method_dict['middle']: |
442
|
1 |
|
match_context = ' ' + match + ' ' |
443
|
1 |
|
loc = 0 |
444
|
1 |
|
while loc != -1: |
445
|
1 |
|
loc = full_name.find(match_context, loc + 1) |
446
|
1 |
|
if loc > 0: |
447
|
1 |
|
if roman: |
448
|
1 |
|
if not any( |
449
|
|
|
abbr in fname for abbr in ('i.', 'v.', 'x.') |
|
|
|
|
450
|
|
|
): |
451
|
1 |
|
full_name = ( |
452
|
|
|
full_name[:loc] |
453
|
|
|
+ full_name[loc + len(match) + 1 :] |
454
|
|
|
) |
455
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'b' |
456
|
1 |
|
if toolcode[3] == '000': |
457
|
1 |
|
toolcode[3] = '{:03d}'.format(num) |
458
|
1 |
|
if normalize == 2: |
459
|
1 |
|
fname, lname = roman_check( |
460
|
|
|
match, fname, lname |
461
|
|
|
) |
462
|
|
|
else: |
463
|
1 |
|
full_name = ( |
464
|
|
|
full_name[:loc] |
465
|
|
|
+ full_name[loc + len(match) + 1 :] |
466
|
|
|
) |
467
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'b' |
468
|
1 |
|
if method & self._method_dict['beginning']: |
469
|
1 |
|
match_context = match + ' ' |
470
|
1 |
|
loc = full_name.find(match_context) |
471
|
1 |
|
if loc == 0: |
472
|
1 |
|
full_name = full_name[len(match) + 1 :] |
473
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'c' |
474
|
1 |
|
if method & self._method_dict['beginning_no_space']: |
475
|
1 |
|
loc = full_name.find(match) |
476
|
1 |
|
if loc == 0: |
477
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'd' |
478
|
1 |
|
if full_name[: len(match)] not in toolcode[9]: |
479
|
1 |
|
toolcode[9] += full_name[: len(match)] |
480
|
|
|
|
481
|
1 |
|
if extra: |
482
|
1 |
|
loc = full_name.find(extra) |
483
|
1 |
|
if loc != -1: |
484
|
1 |
|
toolcode[7] += '{:03d}'.format(num) + 'X' |
485
|
|
|
# Since extras are unique, we only look for each of them |
486
|
|
|
# once, and they include otherwise impossible characters |
487
|
|
|
# for this field, it's not possible for the following line |
488
|
|
|
# to have ever been false. |
489
|
|
|
# if full_name[loc:loc+len(extra)] not in toolcode[9]: |
490
|
1 |
|
toolcode[9] += full_name[loc : loc + len(match)] |
491
|
|
|
|
492
|
1 |
|
return lname, fname, ''.join(toolcode) |
493
|
|
|
|
494
|
|
|
|
495
|
1 |
|
def synoname_toolcode(lname, fname='', qual='', normalize=0): |
496
|
|
|
"""Build the Synoname toolcode. |
497
|
|
|
|
498
|
|
|
This is a wrapper for :py:meth:`SynonameToolcode.fingerprint`. |
499
|
|
|
|
500
|
|
|
Parameters |
501
|
|
|
---------- |
502
|
|
|
lname : str |
503
|
|
|
Last name |
504
|
|
|
fname : str |
505
|
|
|
First name (can be blank) |
506
|
|
|
qual : str |
507
|
|
|
Qualifier |
508
|
|
|
normalize : int |
509
|
|
|
Normalization mode (0, 1, or 2) |
510
|
|
|
|
511
|
|
|
Returns |
512
|
|
|
------- |
513
|
|
|
tuple |
514
|
|
|
The transformed names and the synoname toolcode |
515
|
|
|
|
516
|
|
|
Examples |
517
|
|
|
-------- |
518
|
|
|
>>> synoname_toolcode('hat') |
519
|
|
|
('hat', '', '0000000003$$h') |
520
|
|
|
>>> synoname_toolcode('niall') |
521
|
|
|
('niall', '', '0000000005$$n') |
522
|
|
|
>>> synoname_toolcode('colin') |
523
|
|
|
('colin', '', '0000000005$$c') |
524
|
|
|
>>> synoname_toolcode('atcg') |
525
|
|
|
('atcg', '', '0000000004$$a') |
526
|
|
|
>>> synoname_toolcode('entreatment') |
527
|
|
|
('entreatment', '', '0000000011$$e') |
528
|
|
|
|
529
|
|
|
>>> synoname_toolcode('Ste.-Marie', 'Count John II', normalize=2) |
530
|
|
|
('ste.-marie ii', 'count john', '0200491310$015b049a127c$smcji') |
531
|
|
|
>>> synoname_toolcode('Michelangelo IV', '', 'Workshop of') |
532
|
|
|
('michelangelo iv', '', '3000550015$055b$mi') |
533
|
|
|
|
534
|
|
|
""" |
535
|
1 |
|
return SynonameToolcode().fingerprint(lname, fname, qual, normalize) |
536
|
|
|
|
537
|
|
|
|
538
|
|
|
if __name__ == '__main__': |
539
|
|
|
import doctest |
540
|
|
|
|
541
|
|
|
doctest.testmod() |
542
|
|
|
|