Passed
Push — main ( 4cd557...ac26d8 )
by torrua
01:36
created

loglan_db.model_db.base_word   B

Complexity

Total Complexity 51

Size/Duplication

Total Lines 551
Duplicated Lines 0 %

Test Coverage

Coverage 99.42%

Importance

Changes 0
Metric Value
eloc 239
dl 0
loc 551
ccs 170
cts 171
cp 0.9942
rs 7.92
c 0
b 0
f 0
wmc 51

26 Methods

Rating   Name   Duplication   Size   Complexity  
A BaseWord.affixes() 0 10 1
A BaseWordSource.as_string() 0 8 1
A BaseWord._get_sources_c_prim() 0 25 3
B BaseWord.query_derivatives() 0 25 7
A BaseWord.query_afx() 0 10 1
A BaseWord.query_cpx() 0 10 1
A BaseWord.get_sources_cpx() 0 30 3
A BaseWord.query_parents() 0 10 1
A BaseWord.by_name() 0 15 2
A BaseWord._prepare_sources_cpd() 0 9 1
A BaseWord.__is_parented() 0 11 1
A BaseWord.add_author() 0 12 2
A BaseWord.parents() 0 10 1
A BaseWord.add_child() 0 15 2
A BaseWord._prepare_sources_cpx() 0 12 2
A BaseWord.get_sources_cpd() 0 27 3
A BaseWord.complexes() 0 10 1
A BaseWord.by_event() 0 19 3
A BaseWord.get_sources_prim() 0 17 3
A BaseWord.query_keys() 0 10 1
A BaseWord.by_key() 0 28 4
A BaseWord.add_authors() 0 11 2
A BaseWord.keys() 0 12 1
A BaseWord.words_from_source_cpx() 0 14 1
A BaseWord.add_children() 0 14 2
A BaseWord.words_from_source_cpd() 0 13 1

How to fix   Complexity   

Complexity

Complex classes like loglan_db.model_db.base_word often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# -*- coding: utf-8 -*-
2
# pylint: disable=C0103, C0303
3 1
"""
4
This module contains a basic Word Model and addons
5
"""
6
7 1
from __future__ import annotations
8
9 1
import os
10 1
import re
11 1
from typing import List, Optional, Union
12
13 1
from flask_sqlalchemy import SQLAlchemy, BaseQuery
14 1
from sqlalchemy import or_
15
16 1
from loglan_db import app_lod, db
17 1
from loglan_db.model_db import t_name_word_spells, t_name_words, \
18
    t_name_types, t_name_events, t_name_word_sources
19 1
from loglan_db.model_db.base_author import BaseAuthor
20 1
from loglan_db.model_db.base_connect_tables import \
21
    t_connect_authors, t_connect_words, t_connect_keys
22 1
from loglan_db.model_db.base_event import BaseEvent
23 1
from loglan_db.model_db.base_key import BaseKey
24 1
from loglan_db.model_db.base_type import BaseType
25 1
from loglan_db.model_init import InitBase, DBBase
26 1
from loglan_db.model_db.base_definition import BaseDefinition
27
28 1
if os.environ.get("IS_PDOC", "False") == "True":
29
    db = SQLAlchemy(app_lod())
30
    # TODO Fix pdoc
31
32 1
__pdoc__ = {
33
    'BaseEvent.appeared_words':
34
        """*Relationship query for getting a list of words appeared during this event*
35
36
    **query** : Optional[List[BaseWord]]""",
37
38
    'BaseEvent.deprecated_words':
39
        """*Relationship query for getting a list of words deprecated during this event*
40
41
    **query** : Optional[List[BaseWord]]""",
42
43
    'BaseAuthor.contribution':
44
        """*Relationship query for getting a list of words coined by this author*
45
46
    **query** : Optional[List[BaseWord]]""",
47
48
    'BaseType.words': 'words',
49
    'BaseDefinition.source_word': 'source_word',
50
    'BaseKey.definitions':
51
        """*Relationship query for getting a list of definitions related to this key*
52
53
    **query** : Optional[List[BaseDefinition]]""",
54
55
    'BaseAuthor.created': False, 'BaseAuthor.updated': False,
56
    'BaseEvent.created': False, 'BaseEvent.updated': False,
57
    'BaseKey.created': False, 'BaseKey.updated': False,
58
    'BaseSetting.created': False, 'BaseSetting.updated': False,
59
    'BaseSyllable.created': False, 'BaseSyllable.updated': False,
60
    'BaseType.created': False, 'BaseType.updated': False,
61
    'BaseDefinition.created': False, 'BaseDefinition.updated': False,
62
    'BaseWord.created': False, 'BaseWord.updated': False,
63
}
64
65
66 1
class BaseWord(db.Model, InitBase, DBBase):
67
    """BaseWord model"""
68 1
    __tablename__ = t_name_words
69
70 1
    id = db.Column(db.Integer, primary_key=True)
71
    """Word's internal ID number: Integer"""
72
73 1
    id_old = db.Column(db.Integer, nullable=False)  # Compatibility with the previous database
74 1
    name = db.Column(db.String(64), nullable=False)
75 1
    origin = db.Column(db.String(128))
76 1
    origin_x = db.Column(db.String(64))
77 1
    match = db.Column(db.String(8))
78 1
    rank = db.Column(db.String(8))
79 1
    year = db.Column(db.Date)
80 1
    notes = db.Column(db.JSON)
81 1
    TID_old = db.Column(db.Integer)  # references
82
83 1
    type_id = db.Column("type", db.ForeignKey(f'{t_name_types}.id'), nullable=False)
84 1
    type: BaseType = db.relationship(
85
        BaseType.__name__, backref="words", enable_typechecks=False)
86
87 1
    event_start_id = db.Column(
88
        "event_start", db.ForeignKey(f'{t_name_events}.id'), nullable=False)
89 1
    event_start: BaseEvent = db.relationship(
90
        BaseEvent.__name__, foreign_keys=[event_start_id],
91
        backref="appeared_words", enable_typechecks=False)
92
93 1
    event_end_id = db.Column("event_end", db.ForeignKey(f'{t_name_events}.id'))
94 1
    event_end: BaseEvent = db.relationship(
95
        BaseEvent.__name__, foreign_keys=[event_end_id],
96
        backref="deprecated_words", enable_typechecks=False)
97
98 1
    authors: BaseQuery = db.relationship(
99
        BaseAuthor.__name__, secondary=t_connect_authors,
100
        backref="contribution", lazy='dynamic', enable_typechecks=False)
101
102 1
    definitions: BaseQuery = db.relationship(
103
        BaseDefinition.__name__, backref="source_word",
104
        lazy='dynamic', enable_typechecks=False)
105
106
    # word's derivatives
107 1
    __derivatives = db.relationship(
108
        'BaseWord', secondary=t_connect_words,
109
        primaryjoin=(t_connect_words.c.parent_id == id),
110
        secondaryjoin=(t_connect_words.c.child_id == id),
111
        backref=db.backref('_parents', lazy='dynamic', enable_typechecks=False),
112
        lazy='dynamic', enable_typechecks=False)
113
114 1
    def __is_parented(self, child: BaseWord) -> bool:
115
        """
116
        Check, if this word is already added as a parent for this 'child'
117
118
        Args:
119
            child: BaseWord: BaseWord object to check
120
121
        Returns: bool:
122
123
        """
124 1
        return self.__derivatives.filter(t_connect_words.c.child_id == child.id).count() > 0
125
126 1
    def add_child(self, child: BaseWord) -> str:
127
        """Add derivative for the source word
128
        Get words from Used In and add relationship in database
129
130
        Args:
131
          child: BaseWord: Object to add
132
133
        Returns:
134
            String with the name of the added child (BaseWord.name)
135
136
        """
137
        # TODO add check if type of child is allowed to add to this word
138 1
        if not self.__is_parented(child):
139 1
            self.__derivatives.append(child)
140 1
        return child.name
141
142 1
    def add_children(self, children: List[BaseWord]):
143
        """Add derivatives for the source word
144
        Get words from Used In and add relationship in database
145
146
        Args:
147
          children: List[BaseWord]:
148
149
        Returns:
150
          None
151
152
        """
153
        # TODO add check if type of child is allowed to add to this word
154 1
        new_children = list(set(children) - set(self.__derivatives))
155 1
        _ = self.__derivatives.extend(new_children) if new_children else None
156
157 1
    def add_author(self, author: BaseAuthor) -> str:
158
        """Connect Author object with BaseWord object
159
160
        Args:
161
          author: BaseAuthor:
162
163
        Returns:
164
165
        """
166 1
        if not self.authors.filter(BaseAuthor.abbreviation == author.abbreviation).count() > 0:
167 1
            self.authors.append(author)
168 1
        return author.abbreviation
169
170 1
    def add_authors(self, authors: List[BaseAuthor]):
171
        """Connect Author objects with BaseWord object
172
173
        Args:
174
          authors: List[BaseAuthor]:
175
176
        Returns:
177
178
        """
179 1
        new_authors = list(set(authors) - set(self.authors))
180 1
        _ = self.authors.extend(new_authors) if new_authors else None
181
182 1
    def query_derivatives(self, word_type: str = None,
183
                          word_type_x: str = None, word_group: str = None) -> BaseQuery:
184
        """Query to get all derivatives of the word, depending on its parameters
185
186
        Args:
187
          word_type: str:  (Default value = None)
188
          word_type_x: str:  (Default value = None)
189
          word_group: str:  (Default value = None)
190
191
        Returns:
192
            BaseQuery
193
        """
194 1
        result = self.__derivatives.filter(self.id == t_connect_words.c.parent_id)
195
196 1
        if word_type or word_type_x or word_group:
197 1
            result = result.join(BaseType)
198
199 1
        if word_type:
200 1
            result = result.filter(BaseType.type == word_type)
201 1
        if word_type_x:
202 1
            result = result.filter(BaseType.type_x == word_type_x)
203 1
        if word_group:
204 1
            result = result.filter(BaseType.group == word_group)
205
206 1
        return result.order_by(BaseWord.name.asc())
207
208 1
    def query_parents(self) -> BaseQuery:
209
        """Query to get all parents of the Complexes, Little words or Affixes
210
        :return: Query
211
212
        Args:
213
214
        Returns:
215
            BaseQuery
216
        """
217 1
        return self._parents  # if self.type in self.__parentable else []
218
219 1
    def query_cpx(self) -> BaseQuery:
220
        """Query to qet all the complexes that exist for this word
221
        Only primitives have affixes
222
223
        Args:
224
225
        Returns:
226
            BaseQuery
227
        """
228 1
        return self.query_derivatives(word_group="Cpx")
229
230 1
    def query_afx(self) -> BaseQuery:
231
        """Query to qet all the affixes that exist for this word
232
        Only primitives have affixes
233
234
        Args:
235
236
        Returns:
237
            BaseQuery
238
        """
239 1
        return self.query_derivatives(word_type="Afx")
240
241 1
    def query_keys(self) -> BaseQuery:
242
        """Query for the BaseKeys linked with this BaseWord
243
244
        Args:
245
246
        Returns:
247
            BaseQuery
248
        """
249 1
        return BaseKey.query.join(
250
            t_connect_keys, BaseDefinition, BaseWord).filter(BaseWord.id == self.id)
251
252 1
    @property
253
    def parents(self) -> List[BaseWord]:
254
        """Get all parents of the Complexes, Little words or Affixes
255
256
        Args:
257
258
        Returns:
259
            List[BaseWord]
260
        """
261 1
        return self.query_parents().all()
262
263 1
    @property
264
    def complexes(self) -> List[BaseWord]:
265
        """Get all word's complexes if exist
266
267
        Args:
268
269
        Returns:
270
            List[BaseWord]
271
        """
272 1
        return self.query_cpx().all()
273
274 1
    @property
275
    def affixes(self) -> List[BaseWord]:
276
        """Get all word's affixes if exist
277
278
        Args:
279
280
        Returns:
281
            List[BaseWord]
282
        """
283 1
        return self.query_afx().all()
284
285 1
    @property
286
    def keys(self) -> List[BaseKey]:
287
        """Get all BaseKey object related to this BaseWord
288
        Keep in mind that duplicate keys for different definitions
289
        will not be added to the final result
290
291
        Args:
292
293
        Returns:
294
            List[BaseKey]
295
        """
296 1
        return self.query_keys().all()
297
298 1
    def get_sources_prim(self):
299
        """
300
301
        Returns:
302
303
        """
304
        # existing_prim_types = ["C", "D", "I", "L", "N", "O", "S", ]
305
306 1
        if not self.type.group == "Prim":
307 1
            return None
308
309 1
        prim_type = self.type.type[:1]
310
311 1
        if prim_type == "C":
312 1
            return self._get_sources_c_prim()
313
314 1
        return f"{self.name}: {self.origin}{' < ' + self.origin_x if self.origin_x else ''}"
315
316 1
    def _get_sources_c_prim(self) -> Optional[List[BaseWordSource]]:
317
        """
318
319
        Returns:
320
321
        """
322 1
        if self.type.type != "C-Prim":
323 1
            return None
324
325 1
        pattern_source = r"\d+\/\d+\w"
326 1
        sources = str(self.origin).split(" | ")
327 1
        word_sources = []
328
329 1
        for source in sources:
330 1
            compatibility = re.search(pattern_source, source)[0]
331 1
            c_l = compatibility[:-1].split("/")
332 1
            transcription = (re.search(rf"(?!{pattern_source}) .+", source)[0]).strip()
333 1
            word_source = BaseWordSource(**{
334
                "coincidence": int(c_l[0]),
335
                "length": int(c_l[1]),
336
                "language": compatibility[-1:],
337
                "transcription": transcription, })
338 1
            word_sources.append(word_source)
339
340 1
        return word_sources
341
342 1
    def get_sources_cpx(self, as_str: bool = False) -> List[Union[str, BaseWord]]:
343
        """Extract source words from self.origin field accordingly
344
        Args:
345
            as_str (bool): return BaseWord objects if False else as simple str
346
            (Default value = False)
347
        Example:
348
            'foldjacea' > ['forli', 'djano', 'cenja']
349
        Returns:
350
            List of words from which the self.name was created
351
352
        """
353
354
        # these prims have switched djifoas like 'flo' for 'folma'
355 1
        switch_prims = [
356
            'canli', 'farfu', 'folma', 'forli', 'kutla', 'marka',
357
            'mordu', 'sanca', 'sordi', 'suksi', 'surna']
358
359 1
        if not self.type.group == "Cpx":
360 1
            return []
361
362 1
        sources = self._prepare_sources_cpx()
363
364 1
        result = self.words_from_source_cpx(sources)
365
366 1
        if not as_str:
367 1
            return result
368
369 1
        result_as_str = []
370 1
        _ = [result_as_str.append(r) for r in sources if r not in result_as_str]
371 1
        return result_as_str
372
373 1
    @staticmethod
374
    def words_from_source_cpx(sources: List[str]) -> List[BaseWord]:
375
        """
376
377
        Args:
378
            sources:
379
380
        Returns:
381
382
        """
383 1
        exclude_type_ids = [t.id for t in BaseType.by(["LW", "Cpd"]).all()]
384 1
        return BaseWord.query \
385
            .filter(BaseWord.name.in_(sources)) \
386
            .filter(BaseWord.type_id.notin_(exclude_type_ids)).all()
387
388 1
    def _prepare_sources_cpx(self) -> List[str]:
389
        """
390
        # TODO
391
        Returns:
392
393
        """
394 1
        sources = self.origin.replace("(", "").replace(")", "").replace("/", "")
395 1
        sources = sources.split("+")
396 1
        sources = [
397
            s if not s.endswith(("r", "h")) else s[:-1]
398
            for s in sources if s not in ["y", "r", "n"]]
399 1
        return sources
400
401 1
    def get_sources_cpd(self, as_str: bool = False) -> List[Union[str, BaseWord]]:
402
        """Extract source words from self.origin field accordingly
403
404
        Args:
405
          as_str: bool: return BaseWord objects if False else as simple str
406
          (Default value = False)
407
408
        Returns:
409
          List of words from which the self.name was created
410
411
        """
412
413 1
        if not self.type.type == "Cpd":
414 1
            return []
415
416 1
        sources = self._prepare_sources_cpd()
417
418 1
        result = self.words_from_source_cpd(sources)
419
420 1
        if not as_str:
421 1
            return result
422
423 1
        result_as_str = []
424
425 1
        _ = [result_as_str.append(r) for r in sources if r not in result_as_str and r]
426
427 1
        return result_as_str
428
429 1
    def _prepare_sources_cpd(self) -> List[str]:
430
        """
431
432
        Returns:
433
434
        """
435 1
        sources = self.origin.replace("(", "").replace(")", "").replace("/", "").replace("-", "")
436 1
        sources = [s.strip() for s in sources.split("+")]
437 1
        return sources
438
439 1
    @staticmethod
440
    def words_from_source_cpd(sources: List[str]) -> List[BaseWord]:
441
        """
442
443
        Args:
444
            sources:
445
446
        Returns:
447
448
        """
449 1
        type_ids = [t.id for t in BaseType.by(["LW", "Cpd"]).all()]
450 1
        return BaseWord.query.filter(BaseWord.name.in_(sources)) \
451
            .filter(BaseWord.type_id.in_(type_ids)).all()
452
453 1
    @classmethod
454 1
    def by_event(cls, event_id: Union[BaseEvent, int] = None) -> BaseQuery:
455
        """Query filtered by specified Event (latest by default)
456
457
        Args:
458
          event_id: Union[BaseEvent, int]: Event object or Event.id (int) (Default value = None)
459
460
        Returns:
461
          BaseQuery
462
463
        """
464 1
        if not event_id:
465 1
            event_id = BaseEvent.latest().id
466
467 1
        event_id = BaseEvent.id if isinstance(event_id, BaseEvent) else int(event_id)
468
469 1
        return cls.query.filter(cls.event_start_id <= event_id) \
470
            .filter(or_(cls.event_end_id > event_id, cls.event_end_id.is_(None))) \
471
            .order_by(cls.name)
472
473 1
    @classmethod
474 1
    def by_name(cls, name: str, case_sensitive: bool = False) -> BaseQuery:
475
        """Word.Query filtered by specified name
476
477
        Args:
478
          name: str:
479
          case_sensitive: bool:  (Default value = False)
480
481
        Returns:
482
          BaseQuery
483
484
        """
485 1
        if case_sensitive:
486 1
            return cls.query.filter(cls.name == name)
487 1
        return cls.query.filter(cls.name.in_([name, name.lower(), name.upper()]))
488
489 1
    @classmethod
490 1
    def by_key(
491
            cls, key: Union[BaseKey, str],
492
            language: str = None,
493
            case_sensitive: bool = False) -> BaseQuery:
494
        """Word.Query filtered by specified key
495
496
        Args:
497
          key: Union[BaseKey, str]:
498
          language: str: Language of key (Default value = None)
499
          case_sensitive: bool:  (Default value = False)
500
501
        Returns:
502
          BaseQuery
503
504
        """
505
506 1
        key = BaseKey.word if isinstance(key, BaseKey) else str(key)
507 1
        request = cls.query.join(BaseDefinition, t_connect_keys, BaseKey)
508
509 1
        if case_sensitive:
510 1
            request = request.filter(BaseKey.word == key)
511
        else:
512 1
            request = request.filter(BaseKey.word.in_([key, key.lower(), key.upper()]))
513
514 1
        if language:
515 1
            request = request.filter(BaseKey.language == language)
516 1
        return request
517
518
519 1
class BaseWordSource(InitBase):
520
    """Word Source from BaseWord.origin for Prims"""
521 1
    __tablename__ = t_name_word_sources
522
523 1
    LANGUAGES = {
524
        "E": "English",
525
        "C": "Chinese",
526
        "H": "Hindi",
527
        "R": "Russian",
528
        "S": "Spanish",
529
        "F": "French",
530
        "J": "Japanese",
531
        "G": "German", }
532
533 1
    coincidence: int = None
534 1
    length: int = None
535 1
    language: str = None
536 1
    transcription: str = None
537
538 1
    @property
539
    def as_string(self) -> str:
540
        """
541
        Format WordSource as string, for example, '3/5R mesto'
542
        Returns:
543
            str
544
        """
545 1
        return f"{self.coincidence}/{self.length}{self.language} {self.transcription}"
546
547
548 1
class BaseWordSpell(InitBase):
549
    """BaseWordSpell model"""
550
    __tablename__ = t_name_word_spells
551