|
1
|
|
|
package main |
|
2
|
|
|
|
|
3
|
|
|
import ( |
|
4
|
|
|
"encoding/xml" |
|
5
|
|
|
"fmt" |
|
6
|
|
|
"net/http" |
|
7
|
|
|
"net/url" |
|
8
|
|
|
"os" |
|
9
|
|
|
"path" |
|
10
|
|
|
"time" |
|
11
|
|
|
) |
|
12
|
|
|
|
|
13
|
|
|
type SitemapIndex struct { |
|
|
|
|
|
|
14
|
|
|
XMLName xml.Name `xml:"sitemapindex"` |
|
15
|
|
|
XMLNs string `xml:"xmlns,attr"` |
|
16
|
|
|
Sitemap []Sitemap `xml:"sitemap"` |
|
17
|
|
|
} |
|
18
|
|
|
type Sitemap struct { |
|
|
|
|
|
|
19
|
|
|
Loc string `xml:"loc"` |
|
20
|
|
|
LastMod string `xml:"lastmod,omitempty"` |
|
21
|
|
|
} |
|
22
|
|
|
type SitemapValidation struct { |
|
|
|
|
|
|
23
|
|
|
IsValid bool |
|
24
|
|
|
Sitemap Sitemap |
|
25
|
|
|
} |
|
26
|
|
|
|
|
27
|
|
|
func (s Sitemap) findFileName() string { |
|
28
|
|
|
u, _ := url.Parse(s.Loc) |
|
29
|
|
|
|
|
30
|
|
|
dir := path.Dir(u.Path) |
|
31
|
|
|
|
|
32
|
|
|
if dir=="/" { |
|
33
|
|
|
dir="." |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
filename := u.Path[len(dir):] |
|
37
|
|
|
|
|
38
|
|
|
if _, err := os.Stat(dir); os.IsNotExist(err) != false { |
|
39
|
|
|
os.MkdirAll(dir, 0777) |
|
40
|
|
|
} |
|
41
|
|
|
filename = dir + string(os.PathSeparator) + filename |
|
42
|
|
|
return filename |
|
43
|
|
|
} |
|
44
|
|
|
func (si *SitemapIndex) validate() SitemapIndex { |
|
45
|
|
|
validatedSitemapChannel := make(chan SitemapValidation) |
|
46
|
|
|
|
|
47
|
|
|
for _, sitemap := range (*si).Sitemap { |
|
48
|
|
|
go func(s Sitemap){ |
|
49
|
|
|
s.validate(validatedSitemapChannel) |
|
50
|
|
|
}(sitemap) |
|
51
|
|
|
} |
|
52
|
|
|
|
|
53
|
|
|
newSitemapIndex := SitemapIndex{ |
|
54
|
|
|
XMLNs: si.XMLNs, |
|
55
|
|
|
} |
|
56
|
|
|
|
|
57
|
|
|
for i:=0;i<len((*si).Sitemap);i++ { |
|
58
|
|
|
validatedSitemap := <-validatedSitemapChannel |
|
59
|
|
|
if validatedSitemap.IsValid { |
|
60
|
|
|
newSitemapIndex.Sitemap = append(newSitemapIndex.Sitemap, validatedSitemap.Sitemap) |
|
61
|
|
|
}else{ |
|
62
|
|
|
fmt.Printf("Url is dead: %s\n",validatedSitemap.Sitemap.Loc) |
|
63
|
|
|
} |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
close(validatedSitemapChannel) |
|
67
|
|
|
|
|
68
|
|
|
return newSitemapIndex |
|
69
|
|
|
} |
|
70
|
|
|
|
|
71
|
|
|
func (s *Sitemap) validate(sitemapChannel chan SitemapValidation) { |
|
72
|
|
|
|
|
73
|
|
|
resp,err := http.Get((*s).Loc) |
|
74
|
|
|
if err!=nil { |
|
75
|
|
|
fmt.Println(err.Error) |
|
|
|
|
|
|
76
|
|
|
return |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
validateSitemap := SitemapValidation { |
|
80
|
|
|
Sitemap: (*s), |
|
81
|
|
|
IsValid: true, |
|
82
|
|
|
} |
|
83
|
|
|
|
|
84
|
|
|
if resp.StatusCode != 200 { |
|
85
|
|
|
validateSitemap.IsValid = false; |
|
86
|
|
|
} |
|
87
|
|
|
sitemapChannel <- validateSitemap |
|
88
|
|
|
|
|
89
|
|
|
return |
|
90
|
|
|
} |
|
91
|
|
|
|
|
92
|
|
|
func (si *SitemapIndex) saveToFile(filename string) error { |
|
93
|
|
|
m, err := xml.Marshal((*si)) |
|
94
|
|
|
if err != nil { |
|
95
|
|
|
return err |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
file, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777) |
|
99
|
|
|
file.Write([]byte(xml.Header)) |
|
100
|
|
|
file.Write(m) |
|
101
|
|
|
file.Close() |
|
102
|
|
|
return err |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
|
|
func batchProcess(uri string) { |
|
106
|
|
|
resp, err := http.Get(uri) |
|
107
|
|
|
if err != nil { |
|
108
|
|
|
fmt.Printf("Url cannot fetched: %s\n", uri) |
|
109
|
|
|
fmt.Println(err) |
|
110
|
|
|
os.Exit(1) |
|
111
|
|
|
} |
|
112
|
|
|
|
|
113
|
|
|
rawXMLData := readXMLFromResponse(resp) |
|
114
|
|
|
|
|
115
|
|
|
sitemapIndex := newSitemapIndexFromXML(rawXMLData) |
|
116
|
|
|
sitemapIndexValidate(sitemapIndex) |
|
117
|
|
|
} |
|
118
|
|
|
|
|
119
|
|
|
func sitemapIndexValidate(sitemapIndex SitemapIndex) { |
|
120
|
|
|
newSitemapIndex := sitemapIndex.validate() |
|
121
|
|
|
|
|
122
|
|
|
for _, sitemap := range newSitemapIndex.Sitemap { |
|
123
|
|
|
filename := sitemap.findFileName() |
|
124
|
|
|
if Verbose {fmt.Printf("Filename is %s\n",filename)} |
|
125
|
|
|
singleProcess(sitemap.Loc, filename) |
|
126
|
|
|
time.Sleep(time.Second * 2) |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
|
|
newSitemapIndex.saveToFile(OutputFileName) |
|
130
|
|
|
|
|
131
|
|
|
} |
|
132
|
|
|
|
|
133
|
|
|
func newSitemapIndexFromXML(rawXMLData []byte) SitemapIndex { |
|
134
|
|
|
sm := SitemapIndex{} |
|
135
|
|
|
err := xml.Unmarshal(rawXMLData, &sm) |
|
136
|
|
|
|
|
137
|
|
|
if err != nil { |
|
138
|
|
|
fmt.Printf("Sitemap index cannot parsed. Because: %s", err) |
|
139
|
|
|
return SitemapIndex{} |
|
140
|
|
|
} |
|
141
|
|
|
return sm |
|
142
|
|
|
} |
|
143
|
|
|
|