1
|
|
|
#! /usr/bin/env python2 |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
# Originally written by Barry Warsaw <[email protected]> |
4
|
|
|
# |
5
|
|
|
# Minimally patched to make it even more xgettext compatible |
6
|
|
|
# by Peter Funk <[email protected]> |
7
|
|
|
|
8
|
|
|
"""pygettext -- Python equivalent of xgettext(1) |
9
|
|
|
|
10
|
|
|
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the |
11
|
|
|
internationalization of C programs. Most of these tools are independent of |
12
|
|
|
the programming language and can be used from within Python programs. Martin |
13
|
|
|
von Loewis' work[1] helps considerably in this regard. |
14
|
|
|
|
15
|
|
|
There's one problem though; xgettext is the program that scans source code |
16
|
|
|
looking for message strings, but it groks only C (or C++). Python introduces |
17
|
|
|
a few wrinkles, such as dual quoting characters, triple quoted strings, and |
18
|
|
|
raw strings. xgettext understands none of this. |
19
|
|
|
|
20
|
|
|
Enter pygettext, which uses Python's standard tokenize module to scan Python |
21
|
|
|
source code, generating .pot files identical to what GNU xgettext[2] generates |
22
|
|
|
for C and C++ code. From there, the standard GNU tools can be used. |
23
|
|
|
|
24
|
|
|
A word about marking Python strings as candidates for translation. GNU |
25
|
|
|
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and |
26
|
|
|
gettext_noop. But those can be a lot of text to include all over your code. |
27
|
|
|
C and C++ have a trick: they use the C preprocessor. Most internationalized C |
28
|
|
|
source includes a #define for gettext() to _() so that what has to be written |
29
|
|
|
in the source is much less. Thus these are both translatable strings: |
30
|
|
|
|
31
|
|
|
gettext("Translatable String") |
32
|
|
|
_("Translatable String") |
33
|
|
|
|
34
|
|
|
Python of course has no preprocessor so this doesn't work so well. Thus, |
35
|
|
|
pygettext searches only for _() by default, but see the -k/--keyword flag |
36
|
|
|
below for how to augment this. |
37
|
|
|
|
38
|
|
|
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html |
39
|
|
|
[2] http://www.gnu.org/software/gettext/gettext.html |
40
|
|
|
|
41
|
|
|
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext |
42
|
|
|
where ever possible. However some options are still missing or are not fully |
43
|
|
|
implemented. Also, xgettext's use of command line switches with option |
44
|
|
|
arguments is broken, and in these cases, pygettext just defines additional |
45
|
|
|
switches. |
46
|
|
|
|
47
|
|
|
Usage: pygettext [options] inputfile ... |
48
|
|
|
|
49
|
|
|
Options: |
50
|
|
|
|
51
|
|
|
-a |
52
|
|
|
--extract-all |
53
|
|
|
Extract all strings. |
54
|
|
|
|
55
|
|
|
-d name |
56
|
|
|
--default-domain=name |
57
|
|
|
Rename the default output file from messages.pot to name.pot. |
58
|
|
|
|
59
|
|
|
-E |
60
|
|
|
--escape |
61
|
|
|
Replace non-ASCII characters with octal escape sequences. |
62
|
|
|
|
63
|
|
|
-D |
64
|
|
|
--docstrings |
65
|
|
|
Extract module, class, method, and function docstrings. These do not |
66
|
|
|
need to be wrapped in _() markers, and in fact cannot be for Python to |
67
|
|
|
consider them docstrings. (See also the -X option). |
68
|
|
|
|
69
|
|
|
-h |
70
|
|
|
--help |
71
|
|
|
Print this help message and exit. |
72
|
|
|
|
73
|
|
|
-k word |
74
|
|
|
--keyword=word |
75
|
|
|
Keywords to look for in addition to the default set, which are: |
76
|
|
|
%(DEFAULTKEYWORDS)s |
77
|
|
|
|
78
|
|
|
You can have multiple -k flags on the command line. |
79
|
|
|
|
80
|
|
|
-K |
81
|
|
|
--no-default-keywords |
82
|
|
|
Disable the default set of keywords (see above). Any keywords |
83
|
|
|
explicitly added with the -k/--keyword option are still recognized. |
84
|
|
|
|
85
|
|
|
--no-location |
86
|
|
|
Do not write filename/lineno location comments. |
87
|
|
|
|
88
|
|
|
-n |
89
|
|
|
--add-location |
90
|
|
|
Write filename/lineno location comments indicating where each |
91
|
|
|
extracted string is found in the source. These lines appear before |
92
|
|
|
each msgid. The style of comments is controlled by the -S/--style |
93
|
|
|
option. This is the default. |
94
|
|
|
|
95
|
|
|
-o filename |
96
|
|
|
--output=filename |
97
|
|
|
Rename the default output file from messages.pot to filename. If |
98
|
|
|
filename is `-' then the output is sent to standard out. |
99
|
|
|
|
100
|
|
|
-p dir |
101
|
|
|
--output-dir=dir |
102
|
|
|
Output files will be placed in directory dir. |
103
|
|
|
|
104
|
|
|
-S stylename |
105
|
|
|
--style stylename |
106
|
|
|
Specify which style to use for location comments. Two styles are |
107
|
|
|
supported: |
108
|
|
|
|
109
|
|
|
Solaris # File: filename, line: line-number |
110
|
|
|
GNU #: filename:line |
111
|
|
|
|
112
|
|
|
The style name is case insensitive. GNU style is the default. |
113
|
|
|
|
114
|
|
|
-v |
115
|
|
|
--verbose |
116
|
|
|
Print the names of the files being processed. |
117
|
|
|
|
118
|
|
|
-V |
119
|
|
|
--version |
120
|
|
|
Print the version of pygettext and exit. |
121
|
|
|
|
122
|
|
|
-w columns |
123
|
|
|
--width=columns |
124
|
|
|
Set width of output to columns. |
125
|
|
|
|
126
|
|
|
-x filename |
127
|
|
|
--exclude-file=filename |
128
|
|
|
Specify a file that contains a list of strings that are not be |
129
|
|
|
extracted from the input files. Each string to be excluded must |
130
|
|
|
appear on a line by itself in the file. |
131
|
|
|
|
132
|
|
|
-X filename |
133
|
|
|
--no-docstrings=filename |
134
|
|
|
Specify a file that contains a list of files (one per line) that |
135
|
|
|
should not have their docstrings extracted. This is only useful in |
136
|
|
|
conjunction with the -D option above. |
137
|
|
|
|
138
|
|
|
If `inputfile' is -, standard input is read. |
139
|
|
|
""" |
140
|
|
|
|
141
|
|
|
import os |
142
|
|
|
import sys |
143
|
|
|
import time |
144
|
|
|
import getopt |
145
|
|
|
import tokenize |
146
|
|
|
import operator |
147
|
|
|
|
148
|
|
|
# for selftesting |
149
|
|
|
try: |
150
|
|
|
import fintl |
151
|
|
|
_ = fintl.gettext |
152
|
|
|
except ImportError: |
153
|
|
|
def _(s): return s |
154
|
|
|
|
155
|
|
|
__version__ = '1.4' |
156
|
|
|
|
157
|
|
|
default_keywords = ['_'] |
158
|
|
|
DEFAULTKEYWORDS = ', '.join(default_keywords) |
159
|
|
|
|
160
|
|
|
EMPTYSTRING = '' |
161
|
|
|
|
162
|
|
|
|
163
|
|
|
|
164
|
|
|
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's |
165
|
|
|
# there. |
166
|
|
|
pot_header = _('''\ |
167
|
|
|
# SOME DESCRIPTIVE TITLE. |
168
|
|
|
# Copyright (C) YEAR ORGANIZATION |
169
|
|
|
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. |
170
|
|
|
# |
171
|
|
|
msgid "" |
172
|
|
|
msgstr "" |
173
|
|
|
"Project-Id-Version: PACKAGE VERSION\\n" |
174
|
|
|
"POT-Creation-Date: %(time)s\\n" |
175
|
|
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" |
176
|
|
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" |
177
|
|
|
"Language-Team: LANGUAGE <[email protected]>\\n" |
178
|
|
|
"MIME-Version: 1.0\\n" |
179
|
|
|
"Content-Type: text/plain; charset=CHARSET\\n" |
180
|
|
|
"Content-Transfer-Encoding: ENCODING\\n" |
181
|
|
|
"Generated-By: pygettext.py %(version)s\\n" |
182
|
|
|
|
183
|
|
|
''') |
184
|
|
|
|
185
|
|
|
|
186
|
|
|
def usage(code, msg=''): |
187
|
|
|
print >> sys.stderr, _(__doc__) % globals() |
188
|
|
|
if msg: |
189
|
|
|
print >> sys.stderr, msg |
190
|
|
|
sys.exit(code) |
191
|
|
|
|
192
|
|
|
|
193
|
|
|
|
194
|
|
|
escapes = [] |
195
|
|
|
|
196
|
|
|
def make_escapes(pass_iso8859): |
197
|
|
|
global escapes |
198
|
|
|
if pass_iso8859: |
199
|
|
|
# Allow iso-8859 characters to pass through so that e.g. 'msgid |
200
|
|
|
# "H�he"' would result not result in 'msgid "H\366he"'. Otherwise we |
201
|
|
|
# escape any character outside the 32..126 range. |
202
|
|
|
mod = 128 |
203
|
|
|
else: |
204
|
|
|
mod = 256 |
205
|
|
|
for i in range(256): |
206
|
|
|
if 32 <= (i % mod) <= 126: |
207
|
|
|
escapes.append(chr(i)) |
208
|
|
|
else: |
209
|
|
|
escapes.append("\\%03o" % i) |
210
|
|
|
escapes[ord('\\')] = '\\\\' |
211
|
|
|
escapes[ord('\t')] = '\\t' |
212
|
|
|
escapes[ord('\r')] = '\\r' |
213
|
|
|
escapes[ord('\n')] = '\\n' |
|
|
|
|
214
|
|
|
escapes[ord('\"')] = '\\"' |
215
|
|
|
|
216
|
|
|
|
217
|
|
|
def escape(s): |
218
|
|
|
global escapes |
219
|
|
|
s = list(s) |
220
|
|
|
for i in range(len(s)): |
221
|
|
|
s[i] = escapes[ord(s[i])] |
222
|
|
|
return EMPTYSTRING.join(s) |
223
|
|
|
|
224
|
|
|
|
225
|
|
|
def safe_eval(s): |
226
|
|
|
# unwrap quotes, safely |
227
|
|
|
return eval(s, {'__builtins__':{}}, {}) |
228
|
|
|
|
229
|
|
|
|
230
|
|
|
def normalize(s): |
231
|
|
|
# This converts the various Python string types into a format that is |
232
|
|
|
# appropriate for .po files, namely much closer to C style. |
233
|
|
|
lines = s.split('\n') |
234
|
|
|
if len(lines) == 1: |
235
|
|
|
s = '"' + escape(s) + '"' |
236
|
|
|
else: |
237
|
|
|
if not lines[-1]: |
238
|
|
|
del lines[-1] |
239
|
|
|
lines[-1] = lines[-1] + '\n' |
240
|
|
|
for i in range(len(lines)): |
241
|
|
|
lines[i] = escape(lines[i]) |
242
|
|
|
lineterm = '\\n"\n"' |
243
|
|
|
s = '""\n"' + lineterm.join(lines) + '"' |
244
|
|
|
return s |
245
|
|
|
|
246
|
|
|
|
247
|
|
|
|
248
|
|
|
class TokenEater: |
249
|
|
|
def __init__(self, options): |
250
|
|
|
self.__options = options |
251
|
|
|
self.__messages = {} |
252
|
|
|
self.__state = self.__waiting |
253
|
|
|
self.__data = [] |
254
|
|
|
self.__lineno = -1 |
255
|
|
|
self.__freshmodule = 1 |
256
|
|
|
self.__curfile = None |
257
|
|
|
|
258
|
|
|
def __call__(self, ttype, tstring, stup, etup, line): |
259
|
|
|
# dispatch |
260
|
|
|
## import token |
261
|
|
|
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ |
262
|
|
|
## 'tstring:', tstring |
263
|
|
|
self.__state(ttype, tstring, stup[0]) |
264
|
|
|
|
265
|
|
|
def __waiting(self, ttype, tstring, lineno): |
266
|
|
|
opts = self.__options |
267
|
|
|
# Do docstring extractions, if enabled |
268
|
|
|
if opts.docstrings and not opts.nodocstrings.get(self.__curfile): |
269
|
|
|
# module docstring? |
270
|
|
|
if self.__freshmodule: |
271
|
|
|
if ttype == tokenize.STRING: |
272
|
|
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
273
|
|
|
self.__freshmodule = 0 |
274
|
|
|
elif ttype not in (tokenize.COMMENT, tokenize.NL): |
275
|
|
|
self.__freshmodule = 0 |
276
|
|
|
return |
277
|
|
|
# class docstring? |
278
|
|
|
if ttype == tokenize.NAME and tstring in ('class', 'def'): |
279
|
|
|
self.__state = self.__suiteseen |
280
|
|
|
return |
281
|
|
|
if ttype == tokenize.NAME and tstring in opts.keywords: |
282
|
|
|
self.__state = self.__keywordseen |
283
|
|
|
|
284
|
|
|
def __suiteseen(self, ttype, tstring, lineno): |
285
|
|
|
# ignore anything until we see the colon |
286
|
|
|
if ttype == tokenize.OP and tstring == ':': |
287
|
|
|
self.__state = self.__suitedocstring |
288
|
|
|
|
289
|
|
|
def __suitedocstring(self, ttype, tstring, lineno): |
290
|
|
|
# ignore any intervening noise |
291
|
|
|
if ttype == tokenize.STRING: |
292
|
|
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1) |
293
|
|
|
self.__state = self.__waiting |
294
|
|
|
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, |
295
|
|
|
tokenize.COMMENT): |
296
|
|
|
# there was no class docstring |
297
|
|
|
self.__state = self.__waiting |
298
|
|
|
|
299
|
|
|
def __keywordseen(self, ttype, tstring, lineno): |
300
|
|
|
if ttype == tokenize.OP and tstring == '(': |
301
|
|
|
self.__data = [] |
302
|
|
|
self.__lineno = lineno |
303
|
|
|
self.__state = self.__openseen |
304
|
|
|
else: |
305
|
|
|
self.__state = self.__waiting |
306
|
|
|
|
307
|
|
|
def __openseen(self, ttype, tstring, lineno): |
308
|
|
|
if ttype == tokenize.OP and tstring == ')': |
309
|
|
|
# We've seen the last of the translatable strings. Record the |
310
|
|
|
# line number of the first line of the strings and update the list |
311
|
|
|
# of messages seen. Reset state for the next batch. If there |
312
|
|
|
# were no strings inside _(), then just ignore this entry. |
313
|
|
|
if self.__data: |
314
|
|
|
self.__addentry(EMPTYSTRING.join(self.__data)) |
315
|
|
|
self.__state = self.__waiting |
316
|
|
|
elif ttype == tokenize.STRING: |
317
|
|
|
self.__data.append(safe_eval(tstring)) |
318
|
|
|
# TBD: should we warn if we seen anything else? |
319
|
|
|
|
320
|
|
|
def __addentry(self, msg, lineno=None, isdocstring=0): |
321
|
|
|
if lineno is None: |
322
|
|
|
lineno = self.__lineno |
323
|
|
|
if not msg in self.__options.toexclude: |
324
|
|
|
entry = (self.__curfile, lineno) |
325
|
|
|
self.__messages.setdefault(msg, {})[entry] = isdocstring |
326
|
|
|
|
327
|
|
|
def set_filename(self, filename): |
328
|
|
|
self.__curfile = filename |
329
|
|
|
self.__freshmodule = 1 |
330
|
|
|
|
331
|
|
|
def write(self, fp): |
332
|
|
|
options = self.__options |
333
|
|
|
timestamp = time.ctime(time.time()) |
334
|
|
|
# The time stamp in the header doesn't have the same format as that |
335
|
|
|
# generated by xgettext... |
336
|
|
|
print >> fp, pot_header % {'time': timestamp, 'version': __version__} |
337
|
|
|
# Sort the entries. First sort each particular entry's keys, then |
338
|
|
|
# sort all the entries by their first item. |
339
|
|
|
reverse = {} |
340
|
|
|
for k, v in self.__messages.items(): |
341
|
|
|
keys = v.keys() |
342
|
|
|
keys.sort() |
343
|
|
|
reverse.setdefault(tuple(keys), []).append((k, v)) |
344
|
|
|
rkeys = reverse.keys() |
345
|
|
|
rkeys.sort() |
346
|
|
|
for rkey in rkeys: |
347
|
|
|
rentries = reverse[rkey] |
348
|
|
|
rentries.sort() |
349
|
|
|
for k, v in rentries: |
350
|
|
|
isdocstring = 0 |
351
|
|
|
# If the entry was gleaned out of a docstring, then add a |
352
|
|
|
# comment stating so. This is to aid translators who may wish |
353
|
|
|
# to skip translating some unimportant docstrings. |
354
|
|
|
if reduce(operator.__add__, v.values()): |
355
|
|
|
isdocstring = 1 |
356
|
|
|
# k is the message string, v is a dictionary-set of (filename, |
357
|
|
|
# lineno) tuples. We want to sort the entries in v first by |
358
|
|
|
# file name and then by line number. |
|
|
|
|
359
|
|
|
v = v.keys() |
360
|
|
|
v.sort() |
361
|
|
|
if not options.writelocations: |
362
|
|
|
pass |
363
|
|
|
# location comments are different b/w Solaris and GNU: |
364
|
|
|
elif options.locationstyle == options.SOLARIS: |
365
|
|
|
for filename, lineno in v: |
366
|
|
|
d = {'filename': filename, 'lineno': lineno} |
367
|
|
|
print >>fp, _( |
368
|
|
|
'# File: %(filename)s, line: %(lineno)d') % d |
369
|
|
|
elif options.locationstyle == options.GNU: |
370
|
|
|
# fit as many locations on one line, as long as the |
371
|
|
|
# resulting line length doesn't exceeds 'options.width' |
372
|
|
|
locline = '#:' |
373
|
|
|
for filename, lineno in v: |
374
|
|
|
d = {'filename': filename, 'lineno': lineno} |
375
|
|
|
s = _(' %(filename)s:%(lineno)d') % d |
376
|
|
|
if len(locline) + len(s) <= options.width: |
377
|
|
|
locline = locline + s |
378
|
|
|
else: |
379
|
|
|
print >> fp, locline |
380
|
|
|
locline = "#:" + s |
381
|
|
|
if len(locline) > 2: |
382
|
|
|
print >> fp, locline |
383
|
|
|
if isdocstring: |
384
|
|
|
print >> fp, '#, docstring' |
385
|
|
|
print >> fp, 'msgid', normalize(k) |
386
|
|
|
print >> fp, 'msgstr ""\n' |
387
|
|
|
|
388
|
|
|
|
389
|
|
|
|
390
|
|
|
def main(): |
391
|
|
|
global default_keywords |
392
|
|
|
try: |
393
|
|
|
opts, args = getopt.getopt( |
394
|
|
|
sys.argv[1:], |
395
|
|
|
'ad:DEhk:Kno:p:S:Vvw:x:X:', |
396
|
|
|
['extract-all', 'default-domain=', 'escape', 'help', |
397
|
|
|
'keyword=', 'no-default-keywords', |
398
|
|
|
'add-location', 'no-location', 'output=', 'output-dir=', |
399
|
|
|
'style=', 'verbose', 'version', 'width=', 'exclude-file=', |
400
|
|
|
'docstrings', 'no-docstrings', |
401
|
|
|
]) |
402
|
|
|
except getopt.error, msg: |
403
|
|
|
usage(1, msg) |
404
|
|
|
|
405
|
|
|
# for holding option values |
406
|
|
|
class Options: |
407
|
|
|
# constants |
408
|
|
|
GNU = 1 |
409
|
|
|
SOLARIS = 2 |
410
|
|
|
# defaults |
411
|
|
|
extractall = 0 # FIXME: currently this option has no effect at all. |
412
|
|
|
escape = 0 |
413
|
|
|
keywords = [] |
414
|
|
|
outpath = '' |
415
|
|
|
outfile = 'messages.pot' |
416
|
|
|
writelocations = 1 |
417
|
|
|
locationstyle = GNU |
418
|
|
|
verbose = 0 |
419
|
|
|
width = 78 |
420
|
|
|
excludefilename = '' |
421
|
|
|
docstrings = 0 |
422
|
|
|
nodocstrings = {} |
423
|
|
|
|
424
|
|
|
options = Options() |
425
|
|
|
locations = {'gnu' : options.GNU, |
426
|
|
|
'solaris' : options.SOLARIS, |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
# parse options |
430
|
|
|
for opt, arg in opts: |
431
|
|
|
if opt in ('-h', '--help'): |
432
|
|
|
usage(0) |
433
|
|
|
elif opt in ('-a', '--extract-all'): |
434
|
|
|
options.extractall = 1 |
435
|
|
|
elif opt in ('-d', '--default-domain'): |
436
|
|
|
options.outfile = arg + '.pot' |
437
|
|
|
elif opt in ('-E', '--escape'): |
438
|
|
|
options.escape = 1 |
439
|
|
|
elif opt in ('-D', '--docstrings'): |
440
|
|
|
options.docstrings = 1 |
441
|
|
|
elif opt in ('-k', '--keyword'): |
442
|
|
|
options.keywords.append(arg) |
443
|
|
|
elif opt in ('-K', '--no-default-keywords'): |
444
|
|
|
default_keywords = [] |
445
|
|
|
elif opt in ('-n', '--add-location'): |
446
|
|
|
options.writelocations = 1 |
447
|
|
|
elif opt in ('--no-location',): |
448
|
|
|
options.writelocations = 0 |
449
|
|
|
elif opt in ('-S', '--style'): |
450
|
|
|
options.locationstyle = locations.get(arg.lower()) |
451
|
|
|
if options.locationstyle is None: |
452
|
|
|
usage(1, _('Invalid value for --style: %s') % arg) |
453
|
|
|
elif opt in ('-o', '--output'): |
454
|
|
|
options.outfile = arg |
455
|
|
|
elif opt in ('-p', '--output-dir'): |
456
|
|
|
options.outpath = arg |
457
|
|
|
elif opt in ('-v', '--verbose'): |
458
|
|
|
options.verbose = 1 |
459
|
|
|
elif opt in ('-V', '--version'): |
460
|
|
|
print _('pygettext.py (xgettext for Python) %s') % __version__ |
461
|
|
|
sys.exit(0) |
462
|
|
|
elif opt in ('-w', '--width'): |
463
|
|
|
try: |
464
|
|
|
options.width = int(arg) |
465
|
|
|
except ValueError: |
466
|
|
|
usage(1, _('--width argument must be an integer: %s') % arg) |
467
|
|
|
elif opt in ('-x', '--exclude-file'): |
468
|
|
|
options.excludefilename = arg |
469
|
|
|
elif opt in ('-X', '--no-docstrings'): |
470
|
|
|
fp = open(arg) |
471
|
|
|
try: |
472
|
|
|
while 1: |
473
|
|
|
line = fp.readline() |
474
|
|
|
if not line: |
475
|
|
|
break |
476
|
|
|
options.nodocstrings[line[:-1]] = 1 |
477
|
|
|
finally: |
478
|
|
|
fp.close() |
479
|
|
|
|
480
|
|
|
# calculate escapes |
481
|
|
|
make_escapes(options.escape) |
482
|
|
|
|
483
|
|
|
# calculate all keywords |
484
|
|
|
options.keywords.extend(default_keywords) |
485
|
|
|
|
486
|
|
|
# initialize list of strings to exclude |
487
|
|
|
if options.excludefilename: |
488
|
|
|
try: |
489
|
|
|
fp = open(options.excludefilename) |
490
|
|
|
options.toexclude = fp.readlines() |
491
|
|
|
fp.close() |
492
|
|
|
except IOError: |
493
|
|
|
print >> sys.stderr, _( |
494
|
|
|
"Can't read --exclude-file: %s") % options.excludefilename |
495
|
|
|
sys.exit(1) |
496
|
|
|
else: |
497
|
|
|
options.toexclude = [] |
498
|
|
|
|
499
|
|
|
# slurp through all the files |
500
|
|
|
eater = TokenEater(options) |
501
|
|
|
for filename in args: |
502
|
|
|
if filename == '-': |
503
|
|
|
if options.verbose: |
504
|
|
|
print _('Reading standard input') |
505
|
|
|
fp = sys.stdin |
506
|
|
|
closep = 0 |
507
|
|
|
else: |
508
|
|
|
if options.verbose: |
509
|
|
|
print _('Working on %s') % filename |
510
|
|
|
fp = open(filename) |
511
|
|
|
closep = 1 |
512
|
|
|
try: |
513
|
|
|
eater.set_filename(filename) |
514
|
|
|
try: |
515
|
|
|
tokenize.tokenize(fp.readline, eater) |
516
|
|
|
except tokenize.TokenError, e: |
517
|
|
|
print >> sys.stderr, '%s: %s, line %d, column %d' % ( |
518
|
|
|
e[0], filename, e[1][0], e[1][1]) |
519
|
|
|
finally: |
520
|
|
|
if closep: |
521
|
|
|
fp.close() |
522
|
|
|
|
523
|
|
|
# write the output |
524
|
|
|
if options.outfile == '-': |
525
|
|
|
fp = sys.stdout |
526
|
|
|
closep = 0 |
527
|
|
|
else: |
528
|
|
|
if options.outpath: |
529
|
|
|
options.outfile = os.path.join(options.outpath, options.outfile) |
530
|
|
|
fp = open(options.outfile, 'w') |
531
|
|
|
closep = 1 |
532
|
|
|
try: |
533
|
|
|
eater.write(fp) |
534
|
|
|
finally: |
535
|
|
|
if closep: |
536
|
|
|
fp.close() |
537
|
|
|
|
538
|
|
|
|
539
|
|
|
if __name__ == '__main__': |
540
|
|
|
main() |
541
|
|
|
# some more test strings |
542
|
|
|
_(u'a unicode string') |
543
|
|
|
|