|
1
|
|
|
module MailAddress |
|
2
|
|
|
|
|
3
|
|
|
# -------------------------------------------------------------------------------------------------- |
|
4
|
|
|
# This module is ported from Google Closure JavaScript Library |
|
5
|
|
|
# -> https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress.js |
|
6
|
|
|
# -------------------------------------------------------------------------------------------------- |
|
7
|
|
|
|
|
8
|
|
|
OPENERS_ = '"<([' |
|
9
|
|
|
CLOSERS_ = '">)]' |
|
10
|
|
|
# SPECIAL_CHARS = '()<>@:\\\".[]' |
|
11
|
|
|
ADDRESS_SEPARATORS_ = ',;' |
|
12
|
|
|
# CHARS_REQUIRE_QUOTES_ = SPECIAL_CHARS + ADDRESS_SEPARATORS_ |
|
13
|
|
|
ESCAPED_DOUBLE_QUOTES_ = /\\\"/ |
|
14
|
|
|
ESCAPED_BACKSLASHES_ = /\\\\/ |
|
15
|
|
|
QUOTED_REGEX_STR_ = '[+a-zA-Z0-9_.!#$%&\'*\\/=?^`{|}~-]+' |
|
16
|
|
|
UNQUOTED_REGEX_STR_ = '"' + QUOTED_REGEX_STR_ + '"' |
|
17
|
|
|
LOCAL_PART_REGEXP_STR_ = '(?:' + QUOTED_REGEX_STR_ + '|' + UNQUOTED_REGEX_STR_ + ')' |
|
18
|
|
|
DOMAIN_PART_REGEXP_STR_ = '([a-zA-Z0-9-]+\\.)+[a-zA-Z0-9]{2,63}' |
|
19
|
|
|
EMAIL_ADDRESS_ = Regexp.new('\\A' + LOCAL_PART_REGEXP_STR_ + '@' + DOMAIN_PART_REGEXP_STR_ + '\\z') |
|
20
|
|
|
|
|
21
|
|
|
def self.parse_simple(str) |
|
22
|
|
|
result = [] |
|
23
|
|
|
email = token = '' |
|
24
|
|
|
|
|
25
|
|
|
# Remove non-UNIX-style newlines that would otherwise cause getToken_ to |
|
26
|
|
|
# choke. Remove multiple consecutive whitespace characters for the same |
|
27
|
|
|
# reason. |
|
28
|
|
|
str = self.collapse_whitespace(str) |
|
29
|
|
|
i = 0 |
|
30
|
|
|
while (i < str.length) |
|
31
|
|
|
token = get_token(str, i) |
|
32
|
|
|
if self.is_address_separator(token) || (token == ' ' && self.is_valid(self.parse_internal(email))) |
|
33
|
|
|
if !self.is_empty_or_whitespace(email) |
|
34
|
|
|
result.push(self.parse_internal(email)) |
|
35
|
|
|
end |
|
36
|
|
|
email = '' |
|
37
|
|
|
i += 1 |
|
38
|
|
|
next |
|
39
|
|
|
end |
|
40
|
|
|
email << token |
|
41
|
|
|
i += token.length |
|
42
|
|
|
end |
|
43
|
|
|
|
|
44
|
|
|
# Add the final token. |
|
45
|
|
|
if (!self.is_empty_or_whitespace(email)) |
|
46
|
|
|
result.push(self.parse_internal(email)) |
|
47
|
|
|
end |
|
48
|
|
|
return result |
|
49
|
|
|
end |
|
50
|
|
|
|
|
51
|
|
|
def self.parse_internal(addr) |
|
52
|
|
|
name = '' |
|
53
|
|
|
address = '' |
|
54
|
|
|
i = 0 |
|
55
|
|
|
while (i < addr.length) |
|
56
|
|
|
token = get_token(addr, i) |
|
57
|
|
|
if (token[0] == '<' && token.index('>')) |
|
58
|
|
|
end_i = token.index('>') |
|
59
|
|
|
address = token[1, end_i - 1] |
|
60
|
|
|
elsif (address == '') |
|
61
|
|
|
name << token |
|
62
|
|
|
end |
|
63
|
|
|
i += token.length |
|
64
|
|
|
end |
|
65
|
|
|
|
|
66
|
|
|
# Check if it's a simple email address of the form "[email protected]". |
|
67
|
|
|
if (address == '' && name.index('@')) |
|
68
|
|
|
address = name |
|
69
|
|
|
name = '' |
|
70
|
|
|
end |
|
71
|
|
|
|
|
72
|
|
|
name = self.collapse_whitespace(name) |
|
73
|
|
|
name = name[1 .. -2] if name.start_with?('\'') && name.end_with?('\'') |
|
74
|
|
|
name = name[1 .. -2] if name.start_with?('"') && name.end_with?('"') |
|
75
|
|
|
|
|
76
|
|
|
# Replace escaped quotes and slashes. |
|
77
|
|
|
name = name.gsub(ESCAPED_DOUBLE_QUOTES_, '"') |
|
78
|
|
|
name = name.gsub(ESCAPED_BACKSLASHES_, '\\') |
|
79
|
|
|
|
|
80
|
|
|
#address = goog.string.collapseWhitespace(address); |
|
81
|
|
|
address.strip! |
|
82
|
|
|
|
|
83
|
|
|
addr = addr.strip |
|
84
|
|
|
MailAddress::Address.new(name, address, addr) |
|
85
|
|
|
end |
|
86
|
|
|
|
|
87
|
|
|
def self.get_token(str, pos) |
|
88
|
|
|
ch = str[pos] |
|
89
|
|
|
p = OPENERS_.index(ch) |
|
90
|
|
|
return ch unless p |
|
91
|
|
|
|
|
92
|
|
|
if (self.is_escaped_dbl_quote(str, pos)) |
|
93
|
|
|
# If an opener is an escaped quote we do not treat it as a real opener |
|
94
|
|
|
# and keep accumulating the token. |
|
95
|
|
|
return ch |
|
96
|
|
|
end |
|
97
|
|
|
closer_char = CLOSERS_[p] |
|
98
|
|
|
end_pos = str.index(closer_char, pos + 1) |
|
99
|
|
|
|
|
100
|
|
|
# If the closer is a quote we go forward skipping escaped quotes until we |
|
101
|
|
|
# hit the real closing one. |
|
102
|
|
|
while (end_pos && end_pos >= 0 && self.is_escaped_dbl_quote(str, end_pos)) |
|
103
|
|
|
end_pos = str.index(closer_char, end_pos + 1) |
|
104
|
|
|
end |
|
105
|
|
|
|
|
106
|
|
|
token = (end_pos && end_pos >= 0) ? str[pos .. end_pos] : ch |
|
107
|
|
|
return token |
|
108
|
|
|
end |
|
109
|
|
|
|
|
110
|
|
|
def self.is_escaped_dbl_quote(str, pos) |
|
111
|
|
|
return false if str[pos] != '"' |
|
112
|
|
|
slash_count = 0 |
|
113
|
|
|
|
|
114
|
|
|
for idx in (pos - 1).downto(0) |
|
115
|
|
|
break unless str[idx] == '\\' |
|
116
|
|
|
slash_count += 1 |
|
117
|
|
|
end |
|
118
|
|
|
(slash_count % 2) != 0 |
|
119
|
|
|
end |
|
120
|
|
|
|
|
121
|
|
|
def self.collapse_whitespace(str) |
|
122
|
|
|
str.gsub(/[\s\xc2\xa0]+/, ' ').strip |
|
123
|
|
|
end |
|
124
|
|
|
|
|
125
|
|
|
def self.is_empty_or_whitespace(str) |
|
126
|
|
|
/\A[\s\xc2\xa0]*\z/ =~ str |
|
127
|
|
|
end |
|
128
|
|
|
|
|
129
|
|
|
def self.is_address_separator(ch) |
|
130
|
|
|
ADDRESS_SEPARATORS_.include? ch |
|
131
|
|
|
end |
|
132
|
|
|
|
|
133
|
|
|
def self.is_valid(address) |
|
134
|
|
|
EMAIL_ADDRESS_ =~ address.address |
|
135
|
|
|
end |
|
136
|
|
|
|
|
137
|
|
|
end |
|
138
|
|
|
|