DaRealFreak /
TitleSearch
| 1 | #!/usr/local/bin/python |
||
|
0 ignored issues
–
show
|
|||
| 2 | # coding: utf-8 |
||
| 3 | |||
| 4 | import re |
||
| 5 | |||
| 6 | import jellyfish |
||
| 7 | import requests |
||
| 8 | from bs4 import BeautifulSoup as Soup |
||
| 9 | |||
| 10 | from titlesearch.language.detection import matches_language |
||
| 11 | from titlesearch.language.language_settings import * |
||
|
0 ignored issues
–
show
|
|||
| 12 | |||
| 13 | |||
| 14 | class VisualNovelDatabase(object): |
||
|
0 ignored issues
–
show
|
|||
| 15 | """Module for extracting alternative language titles for titles from https://vndb.org""" |
||
| 16 | |||
| 17 | ROOT_URL = 'https://vndb.org/' |
||
| 18 | SEARCH_URL = 'https://vndb.org/v/all' |
||
| 19 | KNOWN_LANGUAGES = [English, Japanese, Korean] |
||
|
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
Comprehensibility
Best Practice
introduced
by
Comprehensibility
Best Practice
introduced
by
|
|||
| 20 | |||
| 21 | @staticmethod |
||
| 22 | def get_similar_titles(title: str) -> list: |
||
| 23 | """Main function for extracting alternate titles |
||
| 24 | |||
| 25 | :type title: str |
||
| 26 | :return: |
||
| 27 | """ |
||
| 28 | payload = { |
||
| 29 | 'q': title |
||
| 30 | } |
||
| 31 | |||
| 32 | results = [] |
||
| 33 | |||
| 34 | link = requests.get(url=VisualNovelDatabase.SEARCH_URL, params=payload) |
||
| 35 | # use html5lib here to generate a tbody tag from the table(not generated with html.parser) |
||
| 36 | soup = Soup(link.text, 'html5lib') |
||
| 37 | |||
| 38 | # if the match is above a certain percentage we won't get to the search result page but to the |
||
|
0 ignored issues
–
show
|
|||
| 39 | # detail page of the search result so we won't get more than 1 result |
||
| 40 | current_url = soup.select_one('meta[property="og:url"]') |
||
| 41 | # not set apparently in the search result page |
||
| 42 | if current_url and re.match('{0:s}v\d+'.format(VisualNovelDatabase.ROOT_URL), current_url['content']): |
||
|
0 ignored issues
–
show
A suspicious escape sequence
\d was found. Did you maybe forget to add an r prefix?
Escape sequences in Python are generally interpreted according to rules similar
to standard C. Only if strings are prefixed with The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. Loading history...
|
|||
| 43 | return [{ |
||
| 44 | 'title': title, |
||
| 45 | 'link': current_url['content'], |
||
| 46 | 'similarity': 1.00 |
||
| 47 | }] |
||
| 48 | |||
| 49 | title_links = soup.select('tbody td a') |
||
| 50 | for search_result in title_links: |
||
| 51 | results.append({ |
||
| 52 | 'title': search_result['title'], |
||
| 53 | 'link': VisualNovelDatabase.ROOT_URL + search_result['href'], |
||
| 54 | 'similarity': jellyfish.jaro_distance(search_result['title'].lower(), title.lower()) |
||
| 55 | }) |
||
| 56 | |||
| 57 | results.sort(key=lambda item: item['similarity'], reverse=True) |
||
| 58 | return results |
||
| 59 | |||
| 60 | @staticmethod |
||
| 61 | def get_alternative_titles(title: str = '', link: str = '') -> dict: |
||
| 62 | """Get alternative titles for the given title. Preferring link over title argument |
||
| 63 | |||
| 64 | :type title: str |
||
| 65 | :type link: str |
||
| 66 | :return: |
||
| 67 | """ |
||
| 68 | if title and not link: |
||
| 69 | link = VisualNovelDatabase.get_similar_titles(title) |
||
| 70 | if link: |
||
| 71 | link = link[0]['link'] |
||
| 72 | else: |
||
| 73 | return VisualNovelDatabase.group_titles(title, []) |
||
| 74 | |||
| 75 | link = requests.get(url=link) |
||
| 76 | |||
| 77 | result_data = VisualNovelDatabase.parse_results(link.text) |
||
| 78 | |||
| 79 | alternative_titles = [] |
||
| 80 | if 'Aliases' in result_data: |
||
| 81 | for alternative_title in result_data['Aliases'].split(', '): |
||
| 82 | alternative_titles.append(alternative_title) |
||
| 83 | if 'Original title' in result_data: |
||
| 84 | alternative_titles.append(result_data['Original title']) |
||
| 85 | |||
| 86 | return VisualNovelDatabase.group_titles(release_title=result_data['Title'], |
||
| 87 | alternative_titles=alternative_titles) |
||
| 88 | |||
| 89 | @staticmethod |
||
| 90 | def parse_results(html_content: str) -> dict: |
||
| 91 | """Parse the result table of the HTML content into a dictionary |
||
| 92 | |||
| 93 | :type html_content: str |
||
| 94 | :return: |
||
| 95 | """ |
||
| 96 | soup = Soup(html_content, 'html.parser') |
||
| 97 | result_data = {} |
||
| 98 | |||
| 99 | # |
||
| 100 | table_body = soup.select_one('div.vndetails table') |
||
| 101 | rows = table_body.find_all('tr') |
||
| 102 | for row in rows: |
||
| 103 | cols = row.find_all('td') |
||
| 104 | cols = [ele.text.strip() for ele in cols if ele.text.strip()] |
||
| 105 | if cols[1:]: |
||
| 106 | result_data[cols[0]] = cols[1] |
||
| 107 | |||
| 108 | return result_data |
||
| 109 | |||
| 110 | View Code Duplication | @staticmethod |
|
|
0 ignored issues
–
show
|
|||
| 111 | def group_titles(release_title: str, alternative_titles: list) -> dict: |
||
| 112 | """Iterate through the supported languages and group the titles according to the detected languages |
||
|
0 ignored issues
–
show
|
|||
| 113 | |||
| 114 | :type release_title: str |
||
| 115 | :type alternative_titles: list |
||
| 116 | :return: |
||
| 117 | """ |
||
| 118 | grouped_titles = {} |
||
| 119 | for language in VisualNovelDatabase.KNOWN_LANGUAGES: |
||
| 120 | grouped_titles[language.__name__.lower()] = [] |
||
| 121 | |||
| 122 | grouped_titles['english'] = [release_title] |
||
| 123 | |||
| 124 | for title in alternative_titles: |
||
| 125 | for language in VisualNovelDatabase.KNOWN_LANGUAGES: |
||
| 126 | if matches_language(title, language) and title not in grouped_titles[language.__name__.lower()]: |
||
|
0 ignored issues
–
show
|
|||
| 127 | grouped_titles[language.__name__.lower()].append(title) |
||
| 128 | continue |
||
| 129 | |||
| 130 | return grouped_titles |
||
| 131 |
The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:
If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.