Issues (1)

sitemap_index.go (1 issue)

Severity
1
package main
2
3
import (
4
	"encoding/xml"
5
	"fmt"
6
	"net/http"
7
	"net/url"
8
	"os"
9
	"path"
10
	"time"
11
)
12
13
type SitemapIndex struct {
14
	XMLName xml.Name  `xml:"sitemapindex"`
15
	XMLNs   string    `xml:"xmlns,attr"`
16
	Sitemap []Sitemap `xml:"sitemap"`
17
}
18
type Sitemap struct {
19
	Loc     string `xml:"loc"`
20
	LastMod string `xml:"lastmod,omitempty"`
21
}
22
type SitemapValidation struct {
23
    IsValid bool
24
    Sitemap Sitemap
25
}
26
27
func (s Sitemap) findFileName() string {
28
	u, _ := url.Parse(s.Loc)
29
30
	dir := path.Dir(u.Path)
31
32
    if dir=="/" {
33
        dir="."
34
    }
35
36
    filename := u.Path[len(dir):]
37
38
	if _, err := os.Stat(dir); os.IsNotExist(err) != false {
39
		os.MkdirAll(dir, 0777)
40
	}
41
	filename = dir + string(os.PathSeparator) + filename
42
	return filename
43
}
44
func (si *SitemapIndex) validate() SitemapIndex {
45
	validatedSitemapChannel := make(chan SitemapValidation)
46
47
    for _, sitemap := range (*si).Sitemap {
48
        go func(s Sitemap){
49
            s.validate(validatedSitemapChannel)
50
        }(sitemap)
51
    }
52
53
	newSitemapIndex := SitemapIndex{
54
		XMLNs: si.XMLNs,
55
	}
56
57
    for i:=0;i<len((*si).Sitemap);i++ {
58
        validatedSitemap := <-validatedSitemapChannel
59
        if validatedSitemap.IsValid {
60
            newSitemapIndex.Sitemap = append(newSitemapIndex.Sitemap, validatedSitemap.Sitemap)
61
        }else{
62
            fmt.Printf("Url is dead: %s\n",validatedSitemap.Sitemap.Loc)
63
        }
64
	}
65
66
    close(validatedSitemapChannel)
67
68
	return newSitemapIndex
69
}
70
71
func (s *Sitemap) validate(sitemapChannel chan SitemapValidation) {
72
73
    resp,err := http.Get((*s).Loc)
74
    if err!=nil {
75
        fmt.Println(err.Error)
0 ignored issues
show
arg err.Error in Println call is a function value, not a function call
Loading history...
76
        return
77
    }
78
79
    validateSitemap := SitemapValidation {
80
        Sitemap: (*s),
81
        IsValid: true,
82
    }
83
84
    if resp.StatusCode != 200 {
85
        validateSitemap.IsValid = false;
86
    }
87
    sitemapChannel <- validateSitemap
88
89
    return
90
}
91
92
func (si *SitemapIndex) saveToFile(filename string) error {
93
	m, err := xml.Marshal((*si))
94
	if err != nil {
95
		return err
96
	}
97
98
	file, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)
99
	file.Write([]byte(xml.Header))
100
	file.Write(m)
101
	file.Close()
102
	return err
103
}
104
105
func batchProcess(uri string) {
106
	resp, err := http.Get(uri)
107
	if err != nil {
108
		fmt.Printf("Url cannot fetched: %s\n", uri)
109
		fmt.Println(err)
110
		os.Exit(1)
111
	}
112
113
	rawXMLData := readXMLFromResponse(resp)
114
115
	sitemapIndex := newSitemapIndexFromXML(rawXMLData)
116
    sitemapIndexValidate(sitemapIndex)
117
}
118
119
func sitemapIndexValidate(sitemapIndex SitemapIndex) {
120
	newSitemapIndex := sitemapIndex.validate()
121
122
	for _, sitemap := range newSitemapIndex.Sitemap {
123
		filename := sitemap.findFileName()
124
        if Verbose {fmt.Printf("Filename is %s\n",filename)}
125
		singleProcess(sitemap.Loc, filename)
126
		time.Sleep(time.Second * 2)
127
	}
128
129
	newSitemapIndex.saveToFile(OutputFileName)
130
131
}
132
133
func newSitemapIndexFromXML(rawXMLData []byte) SitemapIndex {
134
	sm := SitemapIndex{}
135
	err := xml.Unmarshal(rawXMLData, &sm)
136
137
	if err != nil {
138
		fmt.Printf("Sitemap index cannot parsed. Because: %s", err)
139
		return SitemapIndex{}
140
	}
141
	return sm
142
}
143