1
|
|
|
module MailAddress |
2
|
|
|
|
3
|
|
|
# -------------------------------------------------------------------------------------------------- |
4
|
|
|
# This module is ported from Google Closure JavaScript Library |
5
|
|
|
# -> https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress.js |
6
|
|
|
# -------------------------------------------------------------------------------------------------- |
7
|
|
|
|
8
|
|
|
OPENERS_ = '"<([' |
9
|
|
|
CLOSERS_ = '">)]' |
10
|
|
|
# SPECIAL_CHARS = '()<>@:\\\".[]' |
11
|
|
|
ADDRESS_SEPARATORS_ = ',;' |
12
|
|
|
# CHARS_REQUIRE_QUOTES_ = SPECIAL_CHARS + ADDRESS_SEPARATORS_ |
13
|
|
|
ESCAPED_DOUBLE_QUOTES_ = /\\\"/ |
14
|
|
|
ESCAPED_BACKSLASHES_ = /\\\\/ |
15
|
|
|
QUOTED_REGEX_STR_ = '[+a-zA-Z0-9_.!#$%&\'*\\/=?^`{|}~-]+' |
16
|
|
|
UNQUOTED_REGEX_STR_ = '"' + QUOTED_REGEX_STR_ + '"' |
17
|
|
|
LOCAL_PART_REGEXP_STR_ = '(?:' + QUOTED_REGEX_STR_ + '|' + UNQUOTED_REGEX_STR_ + ')' |
18
|
|
|
DOMAIN_PART_REGEXP_STR_ = '([a-zA-Z0-9-]+\\.)+[a-zA-Z0-9]{2,63}' |
19
|
|
|
EMAIL_ADDRESS_ = Regexp.new('\\A' + LOCAL_PART_REGEXP_STR_ + '@' + DOMAIN_PART_REGEXP_STR_ + '\\z') |
20
|
|
|
|
21
|
|
|
def self.parse_simple(str) |
22
|
|
|
result = [] |
23
|
|
|
email = token = '' |
24
|
|
|
|
25
|
|
|
# Remove non-UNIX-style newlines that would otherwise cause getToken_ to |
26
|
|
|
# choke. Remove multiple consecutive whitespace characters for the same |
27
|
|
|
# reason. |
28
|
|
|
str = self.collapse_whitespace(str) |
29
|
|
|
i = 0 |
30
|
|
|
while (i < str.length) |
31
|
|
|
token = get_token(str, i) |
32
|
|
|
if self.is_address_separator(token) || (token == ' ' && self.is_valid(self.parse_internal(email))) |
33
|
|
|
if !self.is_empty_or_whitespace(email) |
34
|
|
|
result.push(self.parse_internal(email)) |
35
|
|
|
end |
36
|
|
|
email = '' |
37
|
|
|
i += 1 |
38
|
|
|
next |
39
|
|
|
end |
40
|
|
|
email << token |
41
|
|
|
i += token.length |
42
|
|
|
end |
43
|
|
|
|
44
|
|
|
# Add the final token. |
45
|
|
|
if (!self.is_empty_or_whitespace(email)) |
46
|
|
|
result.push(self.parse_internal(email)) |
47
|
|
|
end |
48
|
|
|
return result |
49
|
|
|
end |
50
|
|
|
|
51
|
|
|
def self.parse_internal(addr) |
52
|
|
|
name = '' |
53
|
|
|
address = '' |
54
|
|
|
i = 0 |
55
|
|
|
while (i < addr.length) |
56
|
|
|
token = get_token(addr, i) |
57
|
|
|
if (token[0] == '<' && token.index('>')) |
58
|
|
|
end_i = token.index('>') |
59
|
|
|
address = token[1, end_i - 1] |
60
|
|
|
elsif (address == '') |
61
|
|
|
name << token |
62
|
|
|
end |
63
|
|
|
i += token.length |
64
|
|
|
end |
65
|
|
|
|
66
|
|
|
# Check if it's a simple email address of the form "[email protected]". |
67
|
|
|
if (address == '' && name.index('@')) |
68
|
|
|
address = name |
69
|
|
|
name = '' |
70
|
|
|
end |
71
|
|
|
|
72
|
|
|
name = self.collapse_whitespace(name) |
73
|
|
|
name = name[1 .. -2] if name.start_with?('\'') && name.end_with?('\'') |
74
|
|
|
name = name[1 .. -2] if name.start_with?('"') && name.end_with?('"') |
75
|
|
|
|
76
|
|
|
# Replace escaped quotes and slashes. |
77
|
|
|
name = name.gsub(ESCAPED_DOUBLE_QUOTES_, '"') |
78
|
|
|
name = name.gsub(ESCAPED_BACKSLASHES_, '\\') |
79
|
|
|
|
80
|
|
|
#address = goog.string.collapseWhitespace(address); |
81
|
|
|
address.strip! |
82
|
|
|
|
83
|
|
|
addr = addr.strip |
84
|
|
|
MailAddress::Address.new(name, address, addr) |
85
|
|
|
end |
86
|
|
|
|
87
|
|
|
def self.get_token(str, pos) |
88
|
|
|
ch = str[pos] |
89
|
|
|
p = OPENERS_.index(ch) |
90
|
|
|
return ch unless p |
91
|
|
|
|
92
|
|
|
if (self.is_escaped_dbl_quote(str, pos)) |
93
|
|
|
# If an opener is an escaped quote we do not treat it as a real opener |
94
|
|
|
# and keep accumulating the token. |
95
|
|
|
return ch |
96
|
|
|
end |
97
|
|
|
closer_char = CLOSERS_[p] |
98
|
|
|
end_pos = str.index(closer_char, pos + 1) |
99
|
|
|
|
100
|
|
|
# If the closer is a quote we go forward skipping escaped quotes until we |
101
|
|
|
# hit the real closing one. |
102
|
|
|
while (end_pos && end_pos >= 0 && self.is_escaped_dbl_quote(str, end_pos)) |
103
|
|
|
end_pos = str.index(closer_char, end_pos + 1) |
104
|
|
|
end |
105
|
|
|
|
106
|
|
|
token = (end_pos && end_pos >= 0) ? str[pos .. end_pos] : ch |
107
|
|
|
return token |
108
|
|
|
end |
109
|
|
|
|
110
|
|
|
def self.is_escaped_dbl_quote(str, pos) |
111
|
|
|
return false if str[pos] != '"' |
112
|
|
|
slash_count = 0 |
113
|
|
|
|
114
|
|
|
for idx in (pos - 1).downto(0) |
115
|
|
|
break unless str[idx] == '\\' |
116
|
|
|
slash_count += 1 |
117
|
|
|
end |
118
|
|
|
(slash_count % 2) != 0 |
119
|
|
|
end |
120
|
|
|
|
121
|
|
|
def self.collapse_whitespace(str) |
122
|
|
|
str.gsub(/[\s\xc2\xa0]+/, ' ').strip |
123
|
|
|
end |
124
|
|
|
|
125
|
|
|
def self.is_empty_or_whitespace(str) |
126
|
|
|
/\A[\s\xc2\xa0]*\z/ =~ str |
127
|
|
|
end |
128
|
|
|
|
129
|
|
|
def self.is_address_separator(ch) |
130
|
|
|
ADDRESS_SEPARATORS_.include? ch |
131
|
|
|
end |
132
|
|
|
|
133
|
|
|
def self.is_valid(address) |
134
|
|
|
EMAIL_ADDRESS_ =~ address.address |
135
|
|
|
end |
136
|
|
|
|
137
|
|
|
end |
138
|
|
|
|