Passed
Pull Request — master (#3)
by Shlomi
02:10
created

ethically.we.utils.round_to_extreme()   A

Complexity

Conditions 2

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
import math
2
3
import numpy as np
4
import pandas as pd
5
6
7
def round_to_extreme(value, digits=2):
8
    place = 10**digits
9
    new_value = math.ceil(abs(value) * place) / place
10
    if value < 0:
11
        new_value = -new_value
12
    return new_value
13
14
15
def normalize(v):
16
    """Normalize a 1-D vector."""
17
    if v.ndim != 1:
18
        raise ValueError('v should be 1-D, {}-D was given'.format(
19
            v.ndim))
20
    norm = np.linalg.norm(v)
21
    if norm == 0:
22
        return v
23
    return v / norm
24
25
26
def cosine_similarity(v, u):
27
    """Calculate the cosine similarity between two vectors."""
28
    v_norm = np.linalg.norm(v)
29
    u_norm = np.linalg.norm(u)
30
    similarity = v @ u / (v_norm * u_norm)
31
    return similarity
32
33
34
def project_vector(v, u):
35
    """Projecting the vector v onto direction u."""
36
    normalize_u = normalize(u)
37
    return (v @ normalize_u) * normalize_u
38
39
40
def reject_vector(v, u):
41
    """Rejecting the vector v onto direction u."""
42
    return v - project_vector(v, u)
43
44
45
def project_reject_vector(v, u):
46
    """Projecting and rejecting the vector v onto direction u."""
47
    projected_vector = project_vector(v, u)
48
    rejected_vector = v - project_vector(v, u)
49
    return projected_vector, rejected_vector
50
51
52
def update_word_vector(model, word, new_vector):
53
    model.syn0[model.vocab[word].index] = new_vector
54
    if model.syn0norm is not None:
55
        model.syn0norm[model.vocab[word].index] = normalize(new_vector)
56
57
58
def generate_one_word_forms(word):
59
    return [word.lower(), word.upper(), word.title()]
60
61
62
def generate_words_forms(words):
63
    return sum([generate_one_word_forms(word) for word in words], [])
64
65
66
def take_two_sides_extreme_sorted(df, n_extreme,
67
                                  part_column=None,
68
                                  head_value='',
69
                                  tail_value=''):
70
    head_df = df.head(n_extreme)[:]
71
    tail_df = df.tail(n_extreme)[:]
72
73
    if part_column is not None:
74
        head_df[part_column] = head_value
75
        tail_df[part_column] = tail_value
76
77
    return (pd.concat([head_df, tail_df])
78
            .drop_duplicates()
79
            .reset_index(drop=True))
80