package web import ( "net/http" "net/url" "strconv" "github.com/PuerkitoBio/goquery" ) var googleURL = urlMustParse("https://www.google.com/search") type Google struct { keyword string userAgent string page int doc *goquery.Document initDone bool baseSel *goquery.Selection } func (g *Google) SetKeyword(keyword string) { g.keyword = keyword } func (g *Google) SetPage(page int) { g.page = page * 10 } func (g *Google) SetUserAgent(ua string) { g.userAgent = ua } func (g *Google) Init() error { initURL := copyURL(googleURL) query := initURL.Query() query.Set("q", g.keyword) query.Set("start", strconv.Itoa(g.page)) initURL.RawQuery = query.Encode() req, err := http.NewRequest( http.MethodGet, initURL.String(), nil, ) if err != nil { return err } if g.userAgent == "" { g.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" } req.Header.Set("User-Agent", g.userAgent) res, err := http.DefaultClient.Do(req) if err != nil { return err } defer res.Body.Close() doc, err := goquery.NewDocumentFromReader(res.Body) if err != nil { return err } g.doc = doc g.baseSel = doc.Find(`a > h3`) g.initDone = true return nil } func (g *Google) Each(eachCb func(int) error) error { for i := 0; i < g.baseSel.Length(); i++ { err := eachCb(i) if err != nil { return err } } return nil } func (g *Google) Title(i int) (string, error) { return get(g.baseSel, i).Text(), nil } func (g *Google) Link(i int) (string, error) { return get(g.baseSel, i).Parent().AttrOr("href", ""), nil } func (g *Google) Desc(i int) (string, error) { return get(g.baseSel, i).Parent().Parent().Next().Text(), nil } func (g *Google) Name() string { return "google" } func get(sel *goquery.Selection, i int) *goquery.Selection { return sel.Slice(i, i+1) } func urlMustParse(urlStr string) *url.URL { out, _ := url.Parse(urlStr) return out } func copyURL(orig *url.URL) *url.URL { newURL := new(url.URL) *newURL = *orig return newURL }