test__collect_ngrams() - Code Metrics - Inspection of "Various fixes + logging + refactoring." - ContinuumIO/topik - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#79)

unknown

created 2016-04-21 18:55 UTC

test__collect_ngrams() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
dl	0
loc	6
rs	9.2
cc	4

from topik.tokenizers.ngrams import _collect_ngrams, \
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    _collocation_document, ngrams
from nose.tools import nottest
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
sample_data = [

        ("doc1", str(u"Frank the Swank-Tank walked his sassy unicorn, Brony,"
                     u" to prancercise class daily.  Prancercise was "
                     u"a tremendously popular pastime of sassy "
                     u"unicorns and retirees alike.")),
        ("doc2", str(u"Frank the Swank-Tank was also known as Big Daddy Workout Queen "
                     u"as he loved to cross-dress while prancercising."
                     u"Dressing up as a sassy unicorn to match Brony was a key "
                     u"source of enjoyment for both him and the onlooking retirees.")),
        ("doc3", str(u"Big Daddy Workout Queen knew that the best way to get "
                     u"more people to embrace prancercise was to "
                     u"wear flashy outfits with tassels and bells.")),
        ("doc3", str(u"Prancercise is a form of both art and fitness, "
                     u"originally invented by sassy unicorns. It has "
                     u"recently been popularized by such retired "
                     u"celebrities as Frank The Swank-Tank (aka Big Daddy Workout Queen)"))]


@nottest
def generator():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    for item in sample_data:
        yield item


def test__collect_ngrams():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    result_ngrams = _collect_ngrams(sample_data, min_freqs=[2, 2, 2])
    assert(result_ngrams[0].pattern ==
           u'(big daddy|daddy workout|frank swank|swank tank|workout queen|sassy unicorn|sassy unicorns)')
    assert(result_ngrams[1].pattern == u'(big daddy workout|daddy workout queen|frank swank tank)')

    assert(result_ngrams[2].pattern == u'(big daddy workout queen)')



def test__collocation_document():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    these_ngrams = _collect_ngrams(sample_data, min_freqs=[2, 2, 2])
    assert(_collocation_document(sample_data[0][1],these_ngrams) == [

        u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
        u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
        u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
    ])

    assert(_collocation_document(sample_data[1][1],these_ngrams) == [

        u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
        u'loved', u'cross', u'dress', u'prancercising', u'dressing',
        u'sassy_unicorn', u'match', u'brony', u'key', u'source', u'enjoyment',
        u'onlooking', u'retirees'])


def test_ngrams_list():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    freq_bounds = [(2, 100), (2, 100), (2, 100)]
    tokenized_corpora = ngrams(sample_data, freq_bounds=freq_bounds)
    assert(len(freq_bounds) == 3)

    assert(next(tokenized_corpora) == (
        'doc1', [
            u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
            u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
            u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
                 ]))
    assert(next(tokenized_corpora) == (
        'doc2', [
            u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
            u'loved', u'cross', u'dress', u'prancercising',
            u'dressing', u'sassy_unicorn', u'match', u'brony', u'key', u'source',
            u'enjoyment', u'onlooking', u'retirees']))


def test_ngrams_generator():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    freq_bounds = [(2, 100), (2, 100), (2, 100)]
    corpus_gen = generator()
    tokenized_corpora = ngrams(corpus_gen, freq_bounds=freq_bounds)
    assert(len(freq_bounds) == 3)

    assert(next(tokenized_corpora) == (
        'doc1', [
            u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
            u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
            u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
                 ]))
    assert(next(tokenized_corpora) == (
        'doc2', [
            u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
            u'loved', u'cross', u'dress', u'prancercising',
            u'dressing', u'sassy_unicorn', u'match', u'brony', u'key', u'source',
            u'enjoyment', u'onlooking', u'retirees']))


1			from topik.tokenizers.ngrams import _collect_ngrams, \
			0 ignored issues – show Coding Style introduced 2015-11-23 14:51 UTC by Report Bug Copy Issue Report This module should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
2			_collocation_document, ngrams
3			from nose.tools import nottest
			0 ignored issues – show Configuration introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report The import `nose.tools` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
4			sample_data = [
			0 ignored issues – show Coding Style Naming introduced 2015-11-23 14:51 UTC by Report Bug Copy Issue Report The name `sample_data` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
5			("doc1", str(u"Frank the Swank-Tank walked his sassy unicorn, Brony,"
6			u" to prancercise class daily. Prancercise was "
7			u"a tremendously popular pastime of sassy "
8			u"unicorns and retirees alike.")),
9			("doc2", str(u"Frank the Swank-Tank was also known as Big Daddy Workout Queen "
10			u"as he loved to cross-dress while prancercising."
11			u"Dressing up as a sassy unicorn to match Brony was a key "
12			u"source of enjoyment for both him and the onlooking retirees.")),
13			("doc3", str(u"Big Daddy Workout Queen knew that the best way to get "
14			u"more people to embrace prancercise was to "
15			u"wear flashy outfits with tassels and bells.")),
16			("doc3", str(u"Prancercise is a form of both art and fitness, "
17			u"originally invented by sassy unicorns. It has "
18			u"recently been popularized by such retired "
19			u"celebrities as Frank The Swank-Tank (aka Big Daddy Workout Queen)"))]
20
21
22			@nottest
23			def generator():
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
24			for item in sample_data:
25			yield item
26
27
28			def test__collect_ngrams():
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
29			result_ngrams = _collect_ngrams(sample_data, min_freqs=[2, 2, 2])
30			assert(result_ngrams[0].pattern ==
31			u'(big daddy\|daddy workout\|frank swank\|swank tank\|workout queen\|sassy unicorn\|sassy unicorns)')
32			assert(result_ngrams[1].pattern == u'(big daddy workout\|daddy workout queen\|frank swank tank)')
			0 ignored issues – show Unused Code Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report There is an unnecessary parenthesis after `assert`. Loading history...
33			assert(result_ngrams[2].pattern == u'(big daddy workout queen)')
			0 ignored issues – show Unused Code Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report There is an unnecessary parenthesis after `assert`. Loading history...
34
35
36			def test__collocation_document():
			0 ignored issues – show Coding Style introduced 2015-11-23 14:51 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
37			these_ngrams = _collect_ngrams(sample_data, min_freqs=[2, 2, 2])
38			assert(_collocation_document(sample_data[0][1],these_ngrams) == [
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report Exactly one space required after comma assert(_collocation_document(sample_data[0][1],these_ngrams) == [ ^ Loading history...
39			u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
40			u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
41			u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
42			])
43
44			assert(_collocation_document(sample_data[1][1],these_ngrams) == [
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report Exactly one space required after comma assert(_collocation_document(sample_data[1][1],these_ngrams) == [ ^ Loading history...
45			u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
46			u'loved', u'cross', u'dress', u'prancercising', u'dressing',
47			u'sassy_unicorn', u'match', u'brony', u'key', u'source', u'enjoyment',
48			u'onlooking', u'retirees'])
49
50
51			def test_ngrams_list():
			0 ignored issues – show Coding Style introduced 2015-11-23 14:51 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
52			freq_bounds = [(2, 100), (2, 100), (2, 100)]
53			tokenized_corpora = ngrams(sample_data, freq_bounds=freq_bounds)
54			assert(len(freq_bounds) == 3)
			0 ignored issues – show Unused Code Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report There is an unnecessary parenthesis after `assert`. Loading history...
55			assert(next(tokenized_corpora) == (
56			'doc1', [
57			u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
58			u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
59			u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
60			]))
61			assert(next(tokenized_corpora) == (
62			'doc2', [
63			u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
64			u'loved', u'cross', u'dress', u'prancercising',
65			u'dressing', u'sassy_unicorn', u'match', u'brony', u'key', u'source',
66			u'enjoyment', u'onlooking', u'retirees']))
67
68
69			def test_ngrams_generator():
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
70			freq_bounds = [(2, 100), (2, 100), (2, 100)]
71			corpus_gen = generator()
72			tokenized_corpora = ngrams(corpus_gen, freq_bounds=freq_bounds)
73			assert(len(freq_bounds) == 3)
			0 ignored issues – show Unused Code Coding Style introduced 2015-11-23 14:51 UTC by Report Bug Copy Issue Report There is an unnecessary parenthesis after `assert`. Loading history...
74			assert(next(tokenized_corpora) == (
75			'doc1', [
76			u'frank_swank', u'tank', u'walked', u'sassy_unicorn', u'brony',
77			u'prancercise', u'class', u'daily', u'prancercise', u'tremendously',
78			u'popular', u'pastime', u'sassy_unicorns', u'retirees', u'alike'
79			]))
80			assert(next(tokenized_corpora) == (
81			'doc2', [
82			u'frank_swank', u'tank', u'known', u'big_daddy', u'workout_queen',
83			u'loved', u'cross', u'dress', u'prancercising',
84			u'dressing', u'sassy_unicorn', u'match', u'brony', u'key', u'source',
85			u'enjoyment', u'onlooking', u'retirees']))
			0 ignored issues – show Coding Style introduced 2016-04-21 18:16 UTC by Report Bug Copy Issue Report Final newline missing Loading history...

ContinuumIO / topik

Pull Request — master (#79)

test__collect_ngrams() A

Complexity

Size

Duplication

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files