1
|
|
|
#!/usr/bin/python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
1 |
|
import time |
4
|
1 |
|
from typing import BinaryIO, Union, Iterable |
5
|
|
|
|
6
|
1 |
|
try: |
7
|
1 |
|
from titlesearch import get_similar_titles |
8
|
1 |
|
except ImportError: |
9
|
1 |
|
get_similar_titles = None |
10
|
|
|
|
11
|
1 |
|
from saucenao import SauceNao, FileHandler |
12
|
|
|
|
13
|
|
|
|
14
|
1 |
|
class Worker(SauceNao): |
15
|
|
|
""" |
16
|
|
|
Worker class for checking a list of files |
17
|
|
|
""" |
18
|
|
|
|
19
|
1 |
|
def __init__(self, files: Iterable[Union[BinaryIO, str]], *args, **kwargs): |
20
|
|
|
""" |
21
|
|
|
initializing function |
22
|
|
|
|
23
|
|
|
:type files: Iterable |
24
|
|
|
:type args: |
25
|
|
|
:param kwargs: |
26
|
|
|
""" |
27
|
1 |
|
super().__init__(*args, **kwargs) |
28
|
1 |
|
self.complete_file_list = files |
29
|
|
|
|
30
|
1 |
|
def run(self): |
31
|
|
|
"""Check all files with SauceNao and execute the specified tasks |
32
|
|
|
|
33
|
|
|
:return: |
34
|
|
|
""" |
35
|
1 |
|
for file_name in self.files: |
36
|
1 |
|
start_time = time.time() |
37
|
|
|
|
38
|
1 |
|
if isinstance(file_name, BinaryIO): |
39
|
|
|
filtered_results = self.check_file_object(file_name) |
40
|
|
|
else: |
41
|
1 |
|
filtered_results = self.check_file(file_name) |
42
|
|
|
|
43
|
1 |
|
if not filtered_results: |
44
|
1 |
|
self.logger.info('No results found for image: {0:s}'.format(file_name)) |
45
|
1 |
|
continue |
46
|
|
|
|
47
|
|
|
if self.move_to_categories: |
48
|
|
|
self.__move_to_categories(file_name=file_name, results=filtered_results) |
49
|
|
|
else: |
50
|
|
|
yield { |
51
|
|
|
'filename': file_name, |
52
|
|
|
'results': filtered_results |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
duration = time.time() - start_time |
56
|
|
|
if duration < (30 / self.search_limit_30s): |
57
|
|
|
self.logger.debug("sleeping '{:.2f}' seconds".format((30 / self.search_limit_30s) - duration)) |
58
|
|
|
time.sleep((30 / self.search_limit_30s) - duration) |
59
|
|
|
|
60
|
1 |
|
@property |
61
|
|
|
def excludes(self): |
62
|
|
|
"""Property for excludes |
63
|
|
|
|
64
|
|
|
:return: |
65
|
|
|
""" |
66
|
|
|
if self.exclude_categories: |
67
|
|
|
return [category.lower() for category in self.exclude_categories.split(",")] |
68
|
|
|
else: |
69
|
|
|
return [] |
70
|
|
|
|
71
|
1 |
|
@property |
72
|
|
|
def files(self): |
73
|
|
|
"""Property for files |
74
|
|
|
|
75
|
|
|
:return: |
76
|
|
|
""" |
77
|
1 |
|
if self.start_file: |
78
|
|
|
# change files from generator to list |
79
|
|
|
files = list(self.complete_file_list) |
80
|
|
|
try: |
81
|
|
|
return files[files.index(self.start_file):] |
82
|
|
|
except ValueError: |
83
|
|
|
return self.complete_file_list |
84
|
1 |
|
return self.complete_file_list |
85
|
|
|
|
86
|
1 |
|
def __get_category(self, results: Union[Iterable]): |
87
|
|
|
"""retrieve the category of the checked image based which can be either |
88
|
|
|
the content of the image or the author of the image |
89
|
|
|
|
90
|
|
|
:param results: |
91
|
|
|
:return: str |
92
|
|
|
""" |
93
|
|
|
if self.use_author_as_category: |
94
|
|
|
categories = self.get_title_value(results, SauceNao.CONTENT_AUTHOR_KEY) |
95
|
|
|
else: |
96
|
|
|
categories = self.get_content_value(results, SauceNao.CONTENT_CATEGORY_KEY) |
97
|
|
|
|
98
|
|
|
if not categories: |
99
|
|
|
return '' |
100
|
|
|
|
101
|
|
|
self.logger.debug('categories: {0:s}'.format(', '.join(categories))) |
102
|
|
|
|
103
|
|
|
# since many pictures are tagged as original and with a proper category |
104
|
|
|
# we remove the original category if we have more than 1 category |
105
|
|
|
if not self.use_author_as_category and len(categories) > 1 and 'original' in categories: |
106
|
|
|
categories.remove('original') |
107
|
|
|
|
108
|
|
|
# take the first category |
109
|
|
|
return categories[0] |
110
|
|
|
|
111
|
1 |
|
def __move_to_categories(self, file_name: str, results: Iterable): |
112
|
|
|
"""Check the file for categories and move it to the corresponding folder |
113
|
|
|
|
114
|
|
|
:type file_name: str |
115
|
|
|
:type results: Iterable |
116
|
|
|
:return: bool |
117
|
|
|
""" |
118
|
|
|
category = self.__get_category(results) |
119
|
|
|
if not category: |
120
|
|
|
self.logger.info("no categories found for file: {0:s}".format(file_name)) |
121
|
|
|
return False |
122
|
|
|
|
123
|
|
|
if not self.use_author_as_category: |
124
|
|
|
category = self.__get_similar_title(category) |
125
|
|
|
|
126
|
|
|
# sub categories we don't want to move like original etc |
127
|
|
|
if category.lower() in self.excludes: |
128
|
|
|
self.logger.info("skipping excluded category: {0:s} ({1:s})".format(category, file_name)) |
129
|
|
|
return False |
130
|
|
|
|
131
|
|
|
self.logger.info("moving {0:s} to category: {1:s}".format(file_name, category)) |
132
|
|
|
FileHandler.move_to_category(file_name, category, base_directory=self.directory) |
133
|
|
|
return True |
134
|
|
|
|
135
|
1 |
|
def __get_similar_title(self, category: str): |
136
|
|
|
"""Check for a similar title of the category using my TitleSearch project which you can find here: |
137
|
|
|
https://github.com/DaRealFreak/TitleSearch |
138
|
|
|
|
139
|
|
|
:param category: |
140
|
|
|
:return: |
141
|
|
|
""" |
142
|
|
|
if get_similar_titles: |
143
|
|
|
similar_titles = get_similar_titles(category) |
144
|
|
|
|
145
|
|
|
if similar_titles and similar_titles[0]['similarity'] * 100 >= self.title_minimum_similarity: |
146
|
|
|
self.logger.info( |
147
|
|
|
"Similar title found: {0:s}, {1:s} ({2:.2f}%)".format( |
148
|
|
|
category, similar_titles[0]['title'], similar_titles[0]['similarity'] * 100)) |
149
|
|
|
return similar_titles[0]['title'] |
150
|
|
|
|
151
|
|
|
return category |
152
|
|
|
|