titlesearch.vndb.vndb.VisualNovelDatabase.parse_results() - Code Metrics - DaRealFreak/TitleSearch - Measure and Improve Code Quality continuously with Scrutinizer

VisualNovelDatabase.parse_results() B
last analyzed 2018-04-07 14:58 UTC

↳ Parent: titlesearch.vndb.vndb

Complexity

Conditions

Size

Total Lines	20
Code Lines	12

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	5
eloc	12
nop	1
dl	0
loc	20
rs	8.5454
c	0
b	0
f	0

#!/usr/local/bin/python
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
# coding: utf-8

import re

import jellyfish
import requests
from bs4 import BeautifulSoup as Soup

from titlesearch.language.detection import matches_language
from titlesearch.language.language_settings import *



class VisualNovelDatabase(object):

    """Module for extracting alternative language titles for titles from https://vndb.org"""

    ROOT_URL = 'https://vndb.org/'
    SEARCH_URL = 'https://vndb.org/v/all'
    KNOWN_LANGUAGES = [English, Japanese, Korean]


    @staticmethod
    def get_similar_titles(title: str) -> list:
        """Main function for extracting alternate titles

        :type title: str
        :return:
        """
        payload = {
            'q': title
        }

        results = []

        link = requests.get(url=VisualNovelDatabase.SEARCH_URL, params=payload)
        # use html5lib here to generate a tbody tag from the table(not generated with html.parser)
        soup = Soup(link.text, 'html5lib')

        # if the match is above a certain percentage we won't get to the search result page but to the

        # detail page of the search result so we won't get more than 1 result
        current_url = soup.select_one('meta[property="og:url"]')
        # not set apparently in the search result page
        if current_url and re.match('{0:s}v\d+'.format(VisualNovelDatabase.ROOT_URL), current_url['content']):

            return [{
                'title': title,
                'link': current_url['content'],
                'similarity': 1.00
            }]

        title_links = soup.select('tbody td a')
        for search_result in title_links:
            results.append({
                'title': search_result['title'],
                'link': VisualNovelDatabase.ROOT_URL + search_result['href'],
                'similarity': jellyfish.jaro_distance(search_result['title'].lower(), title.lower())
            })

        results.sort(key=lambda item: item['similarity'], reverse=True)
        return results

    @staticmethod
    def get_alternative_titles(title: str = '', link: str = '') -> dict:
        """Get alternative titles for the given title. Preferring link over title argument

        :type title: str
        :type link: str
        :return:
        """
        if title and not link:
            link = VisualNovelDatabase.get_similar_titles(title)
            if link:
                link = link[0]['link']
            else:
                return VisualNovelDatabase.group_titles(title, [])

        link = requests.get(url=link)

        result_data = VisualNovelDatabase.parse_results(link.text)

        alternative_titles = []
        if 'Aliases' in result_data:
            for alternative_title in result_data['Aliases'].split(', '):
                alternative_titles.append(alternative_title)
        if 'Original title' in result_data:
            alternative_titles.append(result_data['Original title'])

        return VisualNovelDatabase.group_titles(release_title=result_data['Title'],
                                                alternative_titles=alternative_titles)

    @staticmethod
    def parse_results(html_content: str) -> dict:
        """Parse the result table of the HTML content into a dictionary

        :type html_content: str
        :return:
        """
        soup = Soup(html_content, 'html.parser')
        result_data = {}

        #
        table_body = soup.select_one('div.vndetails table')
        rows = table_body.find_all('tr')
        for row in rows:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols if ele.text.strip()]
            if cols[1:]:
                result_data[cols[0]] = cols[1]

        return result_data

    @staticmethod

    def group_titles(release_title: str, alternative_titles: list) -> dict:
        """Iterate through the supported languages and group the titles according to the detected languages


        :type release_title: str
        :type alternative_titles: list
        :return:
        """
        grouped_titles = {}
        for language in VisualNovelDatabase.KNOWN_LANGUAGES:
            grouped_titles[language.__name__.lower()] = []

        grouped_titles['english'] = [release_title]

        for title in alternative_titles:
            for language in VisualNovelDatabase.KNOWN_LANGUAGES:
                if matches_language(title, language) and title not in grouped_titles[language.__name__.lower()]:

                    grouped_titles[language.__name__.lower()].append(title)
                    continue

        return grouped_titles


1		#!/usr/local/bin/python
		0 ignored issues – show Coding Style introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report This module should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
2		# coding: utf-8
3
4		import re
5
6		import jellyfish
7		import requests
8		from bs4 import BeautifulSoup as Soup
9
10		from titlesearch.language.detection import matches_language
11		from titlesearch.language.language_settings import *
		0 ignored issues – show Coding Style introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report The usage of wildcard imports like `titlesearch.language.language_settings` should generally be avoided. Loading history... Unused Code introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report `np` was imported with wildcard, but is not used. Loading history... Unused Code introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report `LanguageTemplate` was imported with wildcard, but is not used. Loading history...
12
13
14		class VisualNovelDatabase(object):
		0 ignored issues – show Unused Code introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report The variable `__class__` seems to be unused. Loading history...
15		"""Module for extracting alternative language titles for titles from https://vndb.org"""
16
17		ROOT_URL = 'https://vndb.org/'
18		SEARCH_URL = 'https://vndb.org/v/all'
19		KNOWN_LANGUAGES = [English, Japanese, Korean]
		0 ignored issues – show Comprehensibility Best Practice introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report The variable `Korean` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report The variable `English` does not seem to be defined. Loading history... Comprehensibility Best Practice introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report The variable `Japanese` does not seem to be defined. Loading history...
20
21		@staticmethod
22		def get_similar_titles(title: str) -> list:
23		"""Main function for extracting alternate titles
24
25		:type title: str
26		:return:
27		"""
28		payload = {
29		'q': title
30		}
31
32		results = []
33
34		link = requests.get(url=VisualNovelDatabase.SEARCH_URL, params=payload)
35		# use html5lib here to generate a tbody tag from the table(not generated with html.parser)
36		soup = Soup(link.text, 'html5lib')
37
38		# if the match is above a certain percentage we won't get to the search result page but to the
		0 ignored issues – show Coding Style introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (102/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
39		# detail page of the search result so we won't get more than 1 result
40		current_url = soup.select_one('meta[property="og:url"]')
41		# not set apparently in the search result page
42		if current_url and re.match('{0:s}v\d+'.format(VisualNovelDatabase.ROOT_URL), current_url['content']):
		0 ignored issues – show Bug introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report A suspicious escape sequence `\d` was found. Did you maybe forget to add an `r` prefix? Escape sequences in Python are generally interpreted according to rules similar to standard C. Only if strings are prefixed with `r` or `R` are they interpreted as regular expressions. The escape sequence that was used indicates that you might have intended to write a regular expression. Learn more about the available escape sequences. in the Python documentation. Loading history... Coding Style introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (110/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
43		return [{
44		'title': title,
45		'link': current_url['content'],
46		'similarity': 1.00
47		}]
48
49		title_links = soup.select('tbody td a')
50		for search_result in title_links:
51		results.append({
52		'title': search_result['title'],
53		'link': VisualNovelDatabase.ROOT_URL + search_result['href'],
54		'similarity': jellyfish.jaro_distance(search_result['title'].lower(), title.lower())
55		})
56
57		results.sort(key=lambda item: item['similarity'], reverse=True)
58		return results
59
60		@staticmethod
61		def get_alternative_titles(title: str = '', link: str = '') -> dict:
62		"""Get alternative titles for the given title. Preferring link over title argument
63
64		:type title: str
65		:type link: str
66		:return:
67		"""
68		if title and not link:
69		link = VisualNovelDatabase.get_similar_titles(title)
70		if link:
71		link = link[0]['link']
72		else:
73		return VisualNovelDatabase.group_titles(title, [])
74
75		link = requests.get(url=link)
76
77		result_data = VisualNovelDatabase.parse_results(link.text)
78
79		alternative_titles = []
80		if 'Aliases' in result_data:
81		for alternative_title in result_data['Aliases'].split(', '):
82		alternative_titles.append(alternative_title)
83		if 'Original title' in result_data:
84		alternative_titles.append(result_data['Original title'])
85
86		return VisualNovelDatabase.group_titles(release_title=result_data['Title'],
87		alternative_titles=alternative_titles)
88
89		@staticmethod
90		def parse_results(html_content: str) -> dict:
91		"""Parse the result table of the HTML content into a dictionary
92
93		:type html_content: str
94		:return:
95		"""
96		soup = Soup(html_content, 'html.parser')
97		result_data = {}
98
99		#
100		table_body = soup.select_one('div.vndetails table')
101		rows = table_body.find_all('tr')
102		for row in rows:
103		cols = row.find_all('td')
104		cols = [ele.text.strip() for ele in cols if ele.text.strip()]
105		if cols[1:]:
106		result_data[cols[0]] = cols[1]
107
108		return result_data
109
110	View Code Duplication	@staticmethod
		0 ignored issues – show Duplication introduced 2018-04-05 12:08 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
111		def group_titles(release_title: str, alternative_titles: list) -> dict:
112		"""Iterate through the supported languages and group the titles according to the detected languages
		0 ignored issues – show Coding Style introduced 2018-04-05 12:08 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (107/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
113
114		:type release_title: str
115		:type alternative_titles: list
116		:return:
117		"""
118		grouped_titles = {}
119		for language in VisualNovelDatabase.KNOWN_LANGUAGES:
120		grouped_titles[language.__name__.lower()] = []
121
122		grouped_titles['english'] = [release_title]
123
124		for title in alternative_titles:
125		for language in VisualNovelDatabase.KNOWN_LANGUAGES:
126		if matches_language(title, language) and title not in grouped_titles[language.__name__.lower()]:
		0 ignored issues – show Coding Style introduced 2018-04-05 10:05 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (112/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
127		grouped_titles[language.__name__.lower()].append(title)
128		continue
129
130		return grouped_titles
131

DaRealFreak / TitleSearch

VisualNovelDatabase.parse_results() B last analyzed 2018-04-07 14:58 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

VisualNovelDatabase.parse_results() B
last analyzed 2018-04-07 14:58 UTC