load_csv() - Code Metrics - hugovk/word-tools - Measure and Improve Code Quality continuously with Scrutinizer

load_csv() B
last analyzed 2017-10-02 04:55 UTC

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	5
c	1
b	0
f	0
dl	0
loc	11
rs	8.5454

#!/usr/bin/env python
# encoding=utf8
"""
Get top word charts from a CSV file of logged tweets:
    word_charts.py -n 100
Get top in a given year:
    word_charts.py -y 2014 -n 100
Get top in a given year that aren't in the previous year.
    word_charts.py -y 2014 -n 100 --diff
Get top for a search term:
    word_charts.py  -s favorite
    word_charts.py  -s favourite
"""
from __future__ import print_function
from most_frequent_words import most_frequent_words_and_counts, commafy
import argparse
import csv


def filter_year(tweets, desired_year):
    found = []
    for tweet in tweets:
        year = int(tweet["created_at"][-4:])
        if desired_year == year:
            found.append(tweet)
    print("Total in " + str(desired_year) + ":\t" + commafy(len(found)))
    return found


def filter_search_term(tweets, desired_search_term):
    found = []
    for tweet in tweets:
        if desired_search_term in tweet["search_term"]:
            found.append(tweet)
    print("Total " + desired_search_term + ":\t" + commafy(len(found)))
    return found


def print_chart(top):
    """
    param top: a list of (word, count) tuples.
    """
    if args.html:
        print("<ol>")

    for i, (word, count) in enumerate(top):
        if args.html:
            print("<li>{0} ({1})</li>".format(word, commafy(count)))
        else:
            print(str(i+1) + ". " + word + " (" + commafy(count) + ")")

    if args.html:
        print("</ol>")


def print_top(tweets, number=10, year=None, search_term=None):
    words = []

    if search_term:
        tweets = filter_search_term(tweets, search_term)

    if year:
        tweets = filter_year(tweets, year)

    for tweet in tweets:
        words.append(tweet['word'])

    print()
    title = "# Top " + str(number)
    if year:
        title += " (" + str(year) + ")"
    print(title)
    print()
    top = most_frequent_words_and_counts(words, number)
    print_chart(top)
    return top


# cmd.exe cannot do Unicode so encode first
def print_it(text):
    print(text.encode('utf-8'))


def load_csv(filename):
    # with codecs.open(filename, mode='rb', encoding='cp1252') as fd:
    with open(filename, mode='rb') as fd:
        data = csv.DictReader(fd)
        rows = []
        seen = set()  # avoid duplicates
        for row in data:
            if row['id_str'] not in seen and row['word'] not in ['actually']:
                seen.add(row['id_str'])
                rows.append(row)
    return rows


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Get top word charts from a CSV file of logged tweets.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '-c', '--csv', default='M:/bin/data/favibot.csv',
        help='Input CSV file')
    parser.add_argument(
        '-n', '--top-number',  type=int, default=10,
        help="Show top X")
    parser.add_argument(
        '-y', '--year',  type=int, default=None,
        help="Only from this year")
    parser.add_argument(
        '-s', '--search_term',
        help="Only for this search term")
    parser.add_argument(
        '--diff', action='store_true',
        help="Compare a year to the previous year")
    parser.add_argument(
        '--html', action='store_true',
        help="Output with html markup")
    args = parser.parse_args()

    tweets = load_csv(args.csv)
    print("Total tweets:\t" + commafy(len(tweets)))

    if args.diff and args.year:
        last_year = print_top(tweets, number=args.top_number, year=args.year-1,
                              search_term=args.search_term)
        this_year = print_top(tweets, number=args.top_number, year=args.year,
                              search_term=args.search_term)
        last_years_words = [e[0] for e in last_year]
        this_years_words = [e[0] for e in this_year]

        set1 = set(last_years_words)
        diff = [x for x in this_years_words if x not in set1]

        top = []
        for word in diff:
            for e in this_year:
                if e[0] == word:
                    top.append(e)
                    continue
        print()
        top_x = " top " + str(args.top_number)
        print("# New entries in the " + str(args.year) + top_x +
              " which weren't in the " + str(args.year-1) + top_x)
        print()
        print_chart(top)

    else:
        print_top(tweets, number=args.top_number, year=args.year,
                  search_term=args.search_term)


# End of file


1			#!/usr/bin/env python
2			# encoding=utf8
3			"""
4			Get top word charts from a CSV file of logged tweets:
5			word_charts.py -n 100
6			Get top in a given year:
7			word_charts.py -y 2014 -n 100
8			Get top in a given year that aren't in the previous year.
9			word_charts.py -y 2014 -n 100 --diff
10			Get top for a search term:
11			word_charts.py -s favorite
12			word_charts.py -s favourite
13			"""
14			from __future__ import print_function
15			from most_frequent_words import most_frequent_words_and_counts, commafy
16			import argparse
17			import csv
18
19
20			def filter_year(tweets, desired_year):
21			found = []
22			for tweet in tweets:
23			year = int(tweet["created_at"][-4:])
24			if desired_year == year:
25			found.append(tweet)
26			print("Total in " + str(desired_year) + ":\t" + commafy(len(found)))
27			return found
28
29
30			def filter_search_term(tweets, desired_search_term):
31			found = []
32			for tweet in tweets:
33			if desired_search_term in tweet["search_term"]:
34			found.append(tweet)
35			print("Total " + desired_search_term + ":\t" + commafy(len(found)))
36			return found
37
38
39			def print_chart(top):
40			"""
41			param top: a list of (word, count) tuples.
42			"""
43			if args.html:
44			print("<ol>")
45
46			for i, (word, count) in enumerate(top):
47			if args.html:
48			print("<li>{0} ({1})</li>".format(word, commafy(count)))
49			else:
50			print(str(i+1) + ". " + word + " (" + commafy(count) + ")")
51
52			if args.html:
53			print("</ol>")
54
55
56			def print_top(tweets, number=10, year=None, search_term=None):
57			words = []
58
59			if search_term:
60			tweets = filter_search_term(tweets, search_term)
61
62			if year:
63			tweets = filter_year(tweets, year)
64
65			for tweet in tweets:
66			words.append(tweet['word'])
67
68			print()
69			title = "# Top " + str(number)
70			if year:
71			title += " (" + str(year) + ")"
72			print(title)
73			print()
74			top = most_frequent_words_and_counts(words, number)
75			print_chart(top)
76			return top
77
78
79			# cmd.exe cannot do Unicode so encode first
80			def print_it(text):
81			print(text.encode('utf-8'))
82
83
84			def load_csv(filename):
85			# with codecs.open(filename, mode='rb', encoding='cp1252') as fd:
86			with open(filename, mode='rb') as fd:
87			data = csv.DictReader(fd)
88			rows = []
89			seen = set() # avoid duplicates
90			for row in data:
91			if row['id_str'] not in seen and row['word'] not in ['actually']:
92			seen.add(row['id_str'])
93			rows.append(row)
94			return rows
95
96
97			if __name__ == "__main__":
98			parser = argparse.ArgumentParser(
99			description="Get top word charts from a CSV file of logged tweets.",
100			formatter_class=argparse.ArgumentDefaultsHelpFormatter)
101			parser.add_argument(
102			'-c', '--csv', default='M:/bin/data/favibot.csv',
103			help='Input CSV file')
104			parser.add_argument(
105			'-n', '--top-number', type=int, default=10,
106			help="Show top X")
107			parser.add_argument(
108			'-y', '--year', type=int, default=None,
109			help="Only from this year")
110			parser.add_argument(
111			'-s', '--search_term',
112			help="Only for this search term")
113			parser.add_argument(
114			'--diff', action='store_true',
115			help="Compare a year to the previous year")
116			parser.add_argument(
117			'--html', action='store_true',
118			help="Output with html markup")
119			args = parser.parse_args()
120
121			tweets = load_csv(args.csv)
122			print("Total tweets:\t" + commafy(len(tweets)))
123
124			if args.diff and args.year:
125			last_year = print_top(tweets, number=args.top_number, year=args.year-1,
126			search_term=args.search_term)
127			this_year = print_top(tweets, number=args.top_number, year=args.year,
128			search_term=args.search_term)
129			last_years_words = [e[0] for e in last_year]
130			this_years_words = [e[0] for e in this_year]
131
132			set1 = set(last_years_words)
133			diff = [x for x in this_years_words if x not in set1]
134
135			top = []
136			for word in diff:
137			for e in this_year:
138			if e[0] == word:
139			top.append(e)
140			continue
141			print()
142			top_x = " top " + str(args.top_number)
143			print("# New entries in the " + str(args.year) + top_x +
144			" which weren't in the " + str(args.year-1) + top_x)
145			print()
146			print_chart(top)
147
148			else:
149			print_top(tweets, number=args.top_number, year=args.year,
150			search_term=args.search_term)
151
152
153			# End of file
154

hugovk / word-tools

load_csv() B last analyzed 2017-10-02 04:55 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

load_csv() B
last analyzed 2017-10-02 04:55 UTC