|
1
|
|
|
#!/usr/local/bin/python |
|
|
|
|
|
|
2
|
|
|
# coding: utf-8 |
|
3
|
|
|
|
|
4
|
|
|
import re |
|
5
|
|
|
|
|
6
|
|
|
import bs4 |
|
7
|
|
|
import jellyfish |
|
8
|
|
|
import requests |
|
9
|
|
|
from bs4 import BeautifulSoup as Soup |
|
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
from titlesearch.language.language_settings import * |
|
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
|
|
14
|
|
|
class MyAnimeList(object): |
|
|
|
|
|
|
15
|
|
|
"""Module for extracting alternative language titles for titles from mangaupdates.com""" |
|
16
|
|
|
|
|
17
|
|
|
SEARCH_URL = 'https://myanimelist.net/search/all' |
|
18
|
|
|
KNOWN_LANGUAGES = [English, Japanese] |
|
|
|
|
|
|
19
|
|
|
ADDED_KEYWORDS = [' (Novel)'] |
|
20
|
|
|
MAPPING = { |
|
21
|
|
|
'English:': 'english', |
|
22
|
|
|
'Synonyms:': 'english', |
|
23
|
|
|
'Japanese:': 'japanese' |
|
24
|
|
|
} |
|
25
|
|
|
|
|
26
|
|
|
@staticmethod |
|
27
|
|
|
def get_similar_titles(title: str) -> list: |
|
28
|
|
|
"""Main function for extracting alternate titles |
|
29
|
|
|
|
|
30
|
|
|
:type title: str |
|
31
|
|
|
:return: |
|
32
|
|
|
""" |
|
33
|
|
|
payload = { |
|
34
|
|
|
'q': title |
|
35
|
|
|
} |
|
36
|
|
|
|
|
37
|
|
|
results = [] |
|
38
|
|
|
|
|
39
|
|
|
link = requests.get(url=MyAnimeList.SEARCH_URL, params=payload) |
|
40
|
|
|
# html5lib parser since html.parser will fail at the content-left div already |
|
41
|
|
|
soup = Soup(link.text, 'html5lib') |
|
42
|
|
|
for search_result in soup.select('div.content-left div.list.di-t.w100 a.hoverinfo_trigger'): |
|
43
|
|
|
search_group = re.search('/anime/|/manga/', search_result['href']) |
|
44
|
|
|
if search_result.text.strip() and search_group: |
|
45
|
|
|
results.append({ |
|
46
|
|
|
'title': search_result.text.strip(), |
|
47
|
|
|
'link': search_result['href'], |
|
48
|
|
|
'similarity': jellyfish.jaro_distance(search_result.text.strip().lower(), title.lower()) |
|
|
|
|
|
|
49
|
|
|
}) |
|
50
|
|
|
|
|
51
|
|
|
results.sort(key=lambda item: item['similarity'], reverse=True) |
|
52
|
|
|
return results |
|
53
|
|
|
|
|
54
|
|
|
@staticmethod |
|
55
|
|
|
def get_alternative_titles(title: str = '', link: str = '') -> dict: |
|
56
|
|
|
"""Get alternative titles for the given title. Preferring link over title argument |
|
57
|
|
|
|
|
58
|
|
|
:type title: str |
|
59
|
|
|
:type link: str |
|
60
|
|
|
:return: |
|
61
|
|
|
""" |
|
62
|
|
|
if title and not link: |
|
63
|
|
|
link = MyAnimeList.get_similar_titles(title) |
|
64
|
|
|
if link: |
|
65
|
|
|
link = link[0]['link'] |
|
66
|
|
|
else: |
|
67
|
|
|
return MyAnimeList.group_titles(title, None) |
|
68
|
|
|
|
|
69
|
|
|
link = requests.get(url=link) |
|
70
|
|
|
soup = Soup(link.text, 'html5lib') |
|
71
|
|
|
|
|
72
|
|
|
release_title = soup.find('span', attrs={'itemprop': 'name'}) |
|
73
|
|
|
if release_title: |
|
74
|
|
|
release_title = release_title.text |
|
75
|
|
|
else: |
|
76
|
|
|
release_title = title if title else '' |
|
77
|
|
|
|
|
78
|
|
|
return MyAnimeList.group_titles(release_title=release_title, soup=soup) |
|
79
|
|
|
|
|
80
|
|
|
@staticmethod |
|
81
|
|
|
def group_titles(release_title: str, soup) -> dict: |
|
82
|
|
|
"""Extract and group the titles of the bs4 Tag to their respective language |
|
83
|
|
|
|
|
84
|
|
|
:type release_title: str |
|
85
|
|
|
:type soup: bs4.element.Tag|None |
|
86
|
|
|
:return: |
|
87
|
|
|
""" |
|
88
|
|
|
grouped_titles = {} |
|
89
|
|
|
for language in MyAnimeList.KNOWN_LANGUAGES: |
|
90
|
|
|
grouped_titles[language.__name__.lower()] = [] |
|
91
|
|
|
|
|
92
|
|
|
grouped_titles['english'] = [release_title] |
|
93
|
|
|
|
|
94
|
|
|
if soup: |
|
95
|
|
|
for search_result in soup.find_all('div', attrs={'class': 'spaceit_pad'}): # type: |
|
96
|
|
|
category = search_result.find('span', attrs={'class': 'dark_text'}) |
|
97
|
|
|
if category: |
|
98
|
|
|
value = "".join( |
|
99
|
|
|
[t for t in search_result.contents if isinstance(t, bs4.element.NavigableString)]).strip() |
|
|
|
|
|
|
100
|
|
|
if category.text.strip() == 'Synonyms:': |
|
101
|
|
|
for synonym in value.split(', '): |
|
102
|
|
|
grouped_titles[MyAnimeList.MAPPING[category.text]].append(synonym) |
|
103
|
|
|
else: |
|
104
|
|
|
if category.text.strip() in MyAnimeList.MAPPING: |
|
105
|
|
|
grouped_titles[MyAnimeList.MAPPING[category.text.strip()]].append(value) |
|
106
|
|
|
|
|
107
|
|
|
return grouped_titles |
|
108
|
|
|
|
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.