Passed
Push — master ( a4f682...8d8e1f )
by Emre
03:38
created

main.*URLSet.validate   B

Complexity

Conditions 6

Size

Total Lines 49
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 31
nop 0
dl 0
loc 49
rs 8.2026
c 0
b 0
f 0
1
package main
2
3
import (
4
	"encoding/xml"
5
	"fmt"
6
	"net/http"
7
	"os"
8
	"sync"
9
	"time"
10
)
11
12
// URLSet is root for site mite
13
type URLSet struct {
14
	XMLName xml.Name `xml:"urlset"`
15
	XMLNs   string   `xml:"xmlns,attr"`
16
	URL     []URL    `xml:"url"`
17
}
18
19
// URL is for every single location url
20
type URL struct {
21
	Loc        string  `xml:"loc"`
22
	LastMod    string  `xml:"lastmod,omitempty"`
23
	ChangeFreq string  `xml:"changefreq,omitempty"`
24
	Priority   float32 `xml:"priority,omitempty"`
25
}
26
27
type ValidURL struct {
28
	IsValid    bool
29
	URL        URL
30
	StatusCode int
31
}
32
33
func (us *URLSet) saveToFile(filename string) error {
34
	m, err := xml.MarshalIndent((*us), "\r\n", "    ")
35
	if err != nil {
36
		return err
37
	}
38
39
	file, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)
40
	file.Write([]byte(xml.Header))
41
	file.Write(m)
42
	file.Close()
43
	return err
44
}
45
46
func (us *URLSet) validate() URLSet {
47
	// Create an HTTP client with a timeout
48
	client := &http.Client{
49
		Timeout: 10 * time.Second,
50
	}
51
52
	// Initialize the new URLSet
53
	newURLSet := URLSet{
54
		XMLNs: us.XMLNs,
55
	}
56
57
	// Create a semaphore to limit the number of concurrent requests
58
	maxConcurrentRequests := 10
59
	sem := make(chan struct{}, maxConcurrentRequests)
60
61
	// Use a WaitGroup to wait for all goroutines
62
	var wg sync.WaitGroup
63
	var mu sync.Mutex
64
65
	n := len(us.URL)
66
	for i, url := range us.URL {
67
		wg.Add(1)
68
		sem <- struct{}{} // Acquire a semaphore slot
69
70
		go func(i int, url URL) {
71
			defer wg.Done()
72
			defer func() { <-sem }() // Release the semaphore slot
73
74
			resp, err := client.Get(url.Loc)
75
			if err != nil {
76
				fmt.Printf("Url %d/%d error: %s\n", i, n, url.Loc)
77
				return
78
			}
79
			defer resp.Body.Close()
80
81
			if resp.StatusCode == 200 {
82
				fmt.Printf("Url %d/%d check (200): %s\n", i, n, url.Loc)
83
				mu.Lock()
84
				newURLSet.URL = append(newURLSet.URL, url)
85
				mu.Unlock()
86
			} else {
87
				fmt.Printf("Url %d/%d dead (%d): %s\n", i, n, resp.StatusCode, url.Loc)
88
			}
89
		}(i, url)
90
	}
91
92
	wg.Wait() // Wait for all requests to complete
93
94
	return newURLSet
95
}
96
97
// i will use first parameter to determine sitemapIndex or not.
98
func newURLSetFromXML(rawXMLData []byte) (bool, URLSet) {
99
	us := URLSet{}
100
101
	err := xml.Unmarshal(rawXMLData, &us)
102
103
	if err != nil { //some kind of goto
104
		sitemapIndex := newSitemapIndexFromXML(rawXMLData)
105
		sitemapIndexValidate(sitemapIndex)
106
		return true, URLSet{}
107
	}
108
	return false, us
109
}
110
111
func singleProcess(uri string, filename string) {
112
	client := &http.Client{
113
		Timeout: 100 * time.Second,
114
	}
115
116
	resp, err := client.Get(uri)
117
	if err != nil {
118
		fmt.Printf("Url cannot fetched: %s\n", uri)
119
		fmt.Println(err)
120
		os.Exit(1)
121
	}
122
123
	rawXMLData := readXMLFromResponse(resp)
124
125
	isJumped, urlSet := newURLSetFromXML(rawXMLData)
126
	if !isJumped {
127
128
		newURLSet := urlSet.validate()
129
130
		err = newURLSet.saveToFile(filename)
131
132
		if err != nil {
133
			fmt.Println(err)
134
			os.Exit(1)
135
		}
136
	}
137
}
138