1
|
|
|
<?php namespace Axisofstevil\StopWords; |
2
|
|
|
|
3
|
|
|
class Words |
4
|
|
|
{ |
5
|
|
|
public static $basic_words = array( |
6
|
|
|
"the","and","or","for","but","yet","so","lot","of","all","a", |
7
|
|
|
"his","her","he","she","that", "any","an","in","with","to" |
8
|
|
|
); |
9
|
|
|
|
10
|
|
|
public static $strict_words = array( |
11
|
|
|
"a","ii","about","above","according","across","39","actually","ad", |
12
|
|
|
"adj","ae","af","after","afterwards","ag","again","against","ai","al", |
13
|
|
|
"all","almost","alone","along","already","also","although","always", |
14
|
|
|
"am","among","amongst","an","and","another","any","anyhow","anyone", |
15
|
|
|
"anything","anywhere","ao","aq","ar","are","aren","aren't","around", |
16
|
|
|
"arpa","as","at","au","aw","az","b","ba","bb","bd","be","became", |
17
|
|
|
"because","become","becomes","becoming","been","before","beforehand", |
18
|
|
|
"begin","beginning","behind","being","below","beside","besides", |
19
|
|
|
"between","beyond","bf","bg","bh","bi","billion","bj","bm","bn","bo", |
20
|
|
|
"both","br","bs","bt","but","buy","bv","bw","by","bz","c","ca","can", |
21
|
|
|
"can't","cannot","caption","cc","cd","cf","cg","ch","ci","ck","cl", |
22
|
|
|
"click","cm","cn","co", |
23
|
|
|
"co.","com","copy","could","couldn","couldn't","cr","cs","cu","cv","cx", |
24
|
|
|
"cy","cz","d","de","did","didn","didn't","dj","dk","dm","do","does", |
25
|
|
|
"doesn","doesn't","don","don't","down","during","dz","e","each","ec", |
26
|
|
|
"edu","ee","eg","eh","eight","eighty","either","else","elsewhere","end", |
27
|
|
|
"ending","enough","er","es","et","etc","even","ever","every","everyone", |
28
|
|
|
"everything","everywhere","except","f","few","fi","fifty","find", |
29
|
|
|
"first","five","fj","fk","fm","fo","for","former","formerly","forty", |
30
|
|
|
"found","four","fr","free","from","further","fx","g","ga","gb","gd", |
31
|
|
|
"ge","get","gf","gg","gh","gi","gl","gm","gmt","gn","go","gov","gp", |
32
|
|
|
"gq","gr","gs","gt","gu","gw","gy","h","had","has","hasn","hasn't", |
33
|
|
|
"have","haven","haven't","he","he'd","he'll","he's","help","hence", |
34
|
|
|
"her","here","here's","hereafter","hereby","herein","hereupon","hers", |
35
|
|
|
"herself","him","himself","his","hk","hm","hn","home","homepage","how", |
36
|
|
|
"however","hr","ht","htm","html","http","hu","hundred","i","i'd","i'll", |
37
|
|
|
"i'm","i've","i.e.","id","ie","if","il","im","in","inc", |
38
|
|
|
"inc.","indeed","information","instead","int","into","io","iq","ir","is", |
39
|
|
|
"isn","isn't","it","it's","its","itself","j","je","jm","jo","join","jp", |
40
|
|
|
"k","ke","kg","kh","ki","km","kn","kp","kr","kw","ky","kz","l","la", |
41
|
|
|
"last","later","latter","lb","lc","least","less","let","let's","li", |
42
|
|
|
"like","likely","lk","ll","lr","ls","lt","ltd","lu","lv","ly","m","ma", |
43
|
|
|
"made","make","makes","many","maybe","mc","md","me","meantime", |
44
|
|
|
"meanwhile","mg","mh","microsoft","might","mil","million","miss","mk", |
45
|
|
|
"ml","mm","mn","mo","more","moreover","most","mostly","mp","mq","mr", |
46
|
|
|
"mrs","ms","msie","mt","mu","much","must","mv","mw","mx","my","myself", |
47
|
|
|
"mz","n","na","namely","nc","ne","neither","net","netscape","never", |
48
|
|
|
"nevertheless","new","next","nf","ng","ni","nine","ninety","nl","no", |
49
|
|
|
"nobody","none","nonetheless","noone","nor","not","nothing","now", |
50
|
|
|
"nowhere","np","nr","nu","nz","o","of","off","often","om","on","once", |
51
|
|
|
"one","one's","only","onto","or","org","other","others","otherwise", |
52
|
|
|
"our","ours","ourselves","out","over","overall","own","p","pa","page", |
53
|
|
|
"pe","per","perhaps","pf","pg","ph","pk","pl","pm","pn","pr","pt","pw", |
54
|
|
|
"py","q","qa","r","rather","re","recent","recently","reserved","ring", |
55
|
|
|
"ro","ru","rw","s","sa","same","sb","sc","sd","se","seem","seemed", |
56
|
|
|
"seeming","seems","seven","seventy","several","sg","sh","she","she'd", |
57
|
|
|
"she'll","she's","should","shouldn","shouldn't","si","since","site", |
58
|
|
|
"six","sixty","sj","sk","sl","sm","sn","so","some","somehow","someone", |
59
|
|
|
"something","sometime","sometimes","somewhere","sr","st","still","stop", |
60
|
|
|
"su","such","sv","sy","sz","t","taking","tc","td","ten","text","tf", |
61
|
|
|
"tg","test","th","than","that","that'll","that's","the","their","them", |
62
|
|
|
"themselves","then","thence","there","there'll","there's","thereafter", |
63
|
|
|
"thereby","therefore","therein","thereupon","these","they","they'd", |
64
|
|
|
"they'll","they're","they've","thirty","this","those","though", |
65
|
|
|
"thousand","three","through","throughout","thru","thus","tj","tk","tm", |
66
|
|
|
"tn","to","together","too","toward","towards","tp","tr","trillion","tt", |
67
|
|
|
"tv","tw","twenty","two","tz","u","ua","ug","uk","um","under","unless", |
68
|
|
|
"unlike","unlikely","until","up","upon","us","use","used","using","uy", |
69
|
|
|
"uz","v","va","vc","ve","very","vg","vi","via","vn","vu","w","was", |
70
|
|
|
"wasn","wasn't","we","we'd","we'll","we're","we've","web","webpage", |
71
|
|
|
"website","welcome","well","were","weren","weren't","wf","what", |
72
|
|
|
"what'll","what's","whatever","when","whence","whenever","where", |
73
|
|
|
"whereafter","whereas","whereby","wherein","whereupon","wherever", |
74
|
|
|
"whether","which","while","whither","who","who'd","who'll","who's", |
75
|
|
|
"whoever","NULL","whole","whom","whomever","whose","why","will","with", |
76
|
|
|
"within","without","won","won't","would","wouldn","wouldn't","ws","www", |
77
|
|
|
"x","y","ye","yes","yet","you","you'd","you'll","you're","you've", |
78
|
|
|
"your","yours","yourself","yourselves","yt","yu","z","za","zm","zr", |
79
|
|
|
"10","z","org","inc","width","length" |
80
|
|
|
); |
81
|
|
|
|
82
|
12 |
|
public static function getDefault($strict = false) |
83
|
|
|
{ |
84
|
12 |
|
return $strict ? static::$strict_words : static::$basic_words; |
85
|
|
|
} |
86
|
|
|
} |
87
|
|
|
|