| 
                    1
                 | 
                                    
                                                     | 
                
                 | 
                package main  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                                                     | 
                
                 | 
                import (  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                                                     | 
                
                 | 
                	"encoding/xml"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                                                     | 
                
                 | 
                	"fmt"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                                                     | 
                
                 | 
                	"net/http"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                                                     | 
                
                 | 
                	"os"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                	"time"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                                                     | 
                
                 | 
                )  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                                                     | 
                
                 | 
                // URLSet is root for site mite  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                type URLSet struct { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                	XMLName xml.Name `xml:"urlset"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    14
                 | 
                                    
                                                     | 
                
                 | 
                	XMLNs   string   `xml:"xmlns,attr"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                                                     | 
                
                 | 
                	URL     []URL    `xml:"url"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                                                     | 
                
                 | 
                // URL is for every single location url  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                                                     | 
                
                 | 
                type URL struct { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                                                     | 
                
                 | 
                	Loc        string  `xml:"loc"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                                                     | 
                
                 | 
                	LastMod    string  `xml:"lastmod,omitempty"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                	ChangeFreq string  `xml:"changefreq,omitempty"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                	Priority   float32 `xml:"priority,omitempty"`  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                                                     | 
                
                 | 
                type ValidURL struct { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                                                     | 
                
                 | 
                	IsValid    bool  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                	URL        URL  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                	StatusCode int  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                func (us *URLSet) saveToFile(filename string) error { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                	m, err := xml.MarshalIndent((*us), "\r\n", "    ")  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                	if err != nil { | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                		return err  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                                                     | 
                
                 | 
                	file, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0777)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                	file.Write([]byte(xml.Header))  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                	file.Write(m)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                	file.Close()  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                	return err  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                func (us *URLSet) validate() URLSet { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                	client := &http.Client{ | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                		Timeout: 10 * time.Second,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                	newURLSet := URLSet{ | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                		XMLNs: us.XMLNs,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                                                     | 
                
                 | 
                	n := len((*us).URL)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                                                     | 
                
                 | 
                	for i, url := range (*us).URL { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                                                     | 
                
                 | 
                		// time.Sleep(5 * time.Second)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                                                     | 
                
                 | 
                		resp, err := client.Get(url.Loc)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                                                     | 
                
                 | 
                		if err == nil { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                                                     | 
                
                 | 
                			statusCode := (*resp).StatusCode  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                			if statusCode == 200 { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                                                     | 
                
                 | 
                				fmt.Printf("Url %d/%d check (%d): %s \n", i, n, statusCode, url.Loc) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                				newURLSet.URL = append(newURLSet.URL, url)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                			} else { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                				fmt.Printf("Url %d/%d dead (%d): %s \n", i, n, statusCode, url.Loc) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                			}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                		} else { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                			fmt.Printf("Url %d/%d error: %s \n", i, n, url.Loc) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                		}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                	return newURLSet  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                                                     | 
                
                 | 
                //i will use first parameter to determine sitemapIndex or not.  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                func newURLSetFromXML(rawXMLData []byte) (bool, URLSet) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                	us := URLSet{} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    77
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    78
                 | 
                                    
                                                     | 
                
                 | 
                	err := xml.Unmarshal(rawXMLData, &us)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    79
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    80
                 | 
                                    
                                                     | 
                
                 | 
                	if err != nil { //some kind of goto | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    81
                 | 
                                    
                                                     | 
                
                 | 
                		sitemapIndex := newSitemapIndexFromXML(rawXMLData)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    82
                 | 
                                    
                                                     | 
                
                 | 
                		sitemapIndexValidate(sitemapIndex)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    83
                 | 
                                    
                                                     | 
                
                 | 
                		return true, URLSet{} | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    84
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    85
                 | 
                                    
                                                     | 
                
                 | 
                	return false, us  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    86
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    87
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    88
                 | 
                                    
                                                     | 
                
                 | 
                func singleProcess(uri string, filename string) { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    89
                 | 
                                    
                                                     | 
                
                 | 
                	client := &http.Client{ | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    90
                 | 
                                    
                                                     | 
                
                 | 
                		Timeout: 100 * time.Second,  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    91
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    92
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    93
                 | 
                                    
                                                     | 
                
                 | 
                	resp, err := client.Get(uri)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    94
                 | 
                                    
                                                     | 
                
                 | 
                	if err != nil { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    95
                 | 
                                    
                                                     | 
                
                 | 
                		fmt.Printf("Url cannot fetched: %s\n", uri) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    96
                 | 
                                    
                                                     | 
                
                 | 
                		fmt.Println(err)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    97
                 | 
                                    
                                                     | 
                
                 | 
                		os.Exit(1)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    98
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    99
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    100
                 | 
                                    
                                                     | 
                
                 | 
                	rawXMLData := readXMLFromResponse(resp)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    101
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    102
                 | 
                                    
                                                     | 
                
                 | 
                	isJumped, urlSet := newURLSetFromXML(rawXMLData)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    103
                 | 
                                    
                                                     | 
                
                 | 
                	if !isJumped { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    104
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    105
                 | 
                                    
                                                     | 
                
                 | 
                		newURLSet := urlSet.validate()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    106
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    107
                 | 
                                    
                                                     | 
                
                 | 
                		err = newURLSet.saveToFile(filename)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    108
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    109
                 | 
                                    
                                                     | 
                
                 | 
                		if err != nil { | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    110
                 | 
                                    
                                                     | 
                
                 | 
                			fmt.Println(err)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    111
                 | 
                                    
                                                     | 
                
                 | 
                			os.Exit(1)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    112
                 | 
                                    
                                                     | 
                
                 | 
                		}  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    113
                 | 
                                    
                                                     | 
                
                 | 
                	}  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    114
                 | 
                                    
                                                     | 
                
                 | 
                }  | 
            
            
                                                        
            
                                    
            
            
                | 
                    115
                 | 
                                    
                                                     | 
                
                 | 
                 |