DaRealFreak /
TitleSearch
| 1 | #!/usr/local/bin/python |
||
| 2 | # coding: utf-8 |
||
| 3 | """ |
||
| 4 | configuration for the different languages, mostly viable for asian languages since they have |
||
| 5 | big differences between the used characters. |
||
| 6 | Unicode ranges mostly taken from here: http://www.rikai.com/library/kanjitables/kanji_codes.unicode.shtml |
||
|
0 ignored issues
–
show
|
|||
| 7 | """ |
||
| 8 | |||
| 9 | import numpy as np |
||
| 10 | |||
| 11 | from titlesearch.language import LanguageTemplate |
||
| 12 | |||
| 13 | |||
| 14 | class English(LanguageTemplate): |
||
|
0 ignored issues
–
show
This class should have a docstring.
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass:
def some_method(self):
"""Do x and return foo."""
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
|
|||
| 15 | unicode_character_lowers = np.array([]) |
||
| 16 | unicode_character_uppers = np.array([]) |
||
| 17 | |||
| 18 | requires_unicode_characters = False |
||
| 19 | forbids_unicode_characters = True |
||
| 20 | |||
| 21 | |||
| 22 | class Korean(LanguageTemplate): |
||
| 23 | """ |
||
| 24 | Hangul Syllables (AC00-D7A3) which corresponds to (가-힣) |
||
| 25 | Hangul Jamo (1100–11FF) |
||
| 26 | Hangul Compatibility Jamo (3130-318F) |
||
| 27 | Hangul Jamo Extended-A (A960-A97F) |
||
| 28 | Hangul Jamo Extended-B (D7B0-D7FF) |
||
| 29 | """ |
||
| 30 | unicode_character_lowers = np.array([0xAC00, 0x1100, 0x3130, 0xA960, 0xD7B0]) |
||
| 31 | unicode_character_uppers = np.array([0xD7A3, 0x11FF, 0x318F, 0xA97F, 0xD7FF]) |
||
| 32 | |||
| 33 | requires_unicode_characters = True |
||
| 34 | forbids_unicode_characters = False |
||
| 35 | |||
| 36 | |||
| 37 | class Japanese(LanguageTemplate): |
||
| 38 | """ |
||
| 39 | JAPANESE_PUNCTUATION=(0x3000, 0x3F) |
||
| 40 | JAPANESE_HIRAGANA=(0x3040, 0x5f) |
||
| 41 | JAPANESE_KATAKANA=(0x30A0, 0x5f) |
||
| 42 | JAPANESE_ROMAN_HALF_WIDTH_KATAKANA=(0xFF00, 0xEF) |
||
| 43 | JAPANESE_KANJI=(0x4e00, 0x9FAF) |
||
| 44 | JAPANESE_KANJI_RARE=(0x3400, 0x19BF) |
||
| 45 | """ |
||
| 46 | unicode_character_lowers = np.array([0x3000, 0x3040, 0x30a0, 0xff00, 0x4e00, 0x3400]) |
||
| 47 | unicode_character_uppers = np.array([0x303f, 0x309f, 0x30ff, 0xffef, 0x9FAF, 0x4dbf]) |
||
| 48 | |||
| 49 | requires_unicode_characters = True |
||
| 50 | forbids_unicode_characters = False |
||
| 51 |
This check looks for lines that are too long. You can specify the maximum line length.