Add AOL engine
This commit is contained in:
parent
0dec4e27aa
commit
f1aec25e0b
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
/static/ext/
|
||||
/scope
|
||||
/cmd/test/
|
||||
/test
|
144
search/web/aol.go
Normal file
144
search/web/aol.go
Normal file
@ -0,0 +1,144 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var aolURL = urlMustParse("https://search.aol.com/aol/search?rp=&s_chn=prt_bon&s_it=comsearch")
|
||||
|
||||
type AOL struct {
|
||||
keyword string
|
||||
userAgent string
|
||||
page int
|
||||
doc *goquery.Document
|
||||
initDone bool
|
||||
baseSel *goquery.Selection
|
||||
}
|
||||
|
||||
// SetKeyword sets the keyword for searching
|
||||
func (a *AOL) SetKeyword(keyword string) {
|
||||
a.keyword = keyword
|
||||
}
|
||||
|
||||
// SetPage sets the page number for searching
|
||||
func (a *AOL) SetPage(page int) {
|
||||
a.page = page * 10
|
||||
if a.page > 0 {
|
||||
a.page++
|
||||
}
|
||||
}
|
||||
|
||||
// SetUserAgent sets the user agent to use for the request
|
||||
func (a *AOL) SetUserAgent(ua string) {
|
||||
a.userAgent = ua
|
||||
}
|
||||
|
||||
// Init runs requests for Bing search engine
|
||||
func (a *AOL) Init() error {
|
||||
// Copy URL so it can be changed
|
||||
initURL := copyURL(aolURL)
|
||||
query := initURL.Query()
|
||||
// Set query
|
||||
query.Set("q", a.keyword)
|
||||
if a.page > 0 {
|
||||
query.Set("b", strconv.Itoa(a.page))
|
||||
}
|
||||
// Update URL query parameters
|
||||
initURL.RawQuery = query.Encode()
|
||||
|
||||
// Create new request for modified URL
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
initURL.String(),
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// If no user agent, use default
|
||||
if a.userAgent == "" {
|
||||
a.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
|
||||
}
|
||||
// Set request user agent
|
||||
req.Header.Set("User-Agent", a.userAgent)
|
||||
|
||||
// Perform request
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
// Create new goquery document
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
a.doc = doc
|
||||
a.baseSel = doc.Find(`h3.title > a[href]`)
|
||||
a.initDone = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each runs eachCb with the index of each search result
|
||||
func (a *AOL) Each(eachCb func(int) error) error {
|
||||
for i := 0; i < a.baseSel.Length(); i++ {
|
||||
err := eachCb(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Title returns the title of the search result corresponding to i
|
||||
func (a *AOL) Title(i int) (string, error) {
|
||||
return get(a.baseSel, i).Text(), nil
|
||||
}
|
||||
|
||||
// Link returns the link to the search result corresponding to i
|
||||
func (a *AOL) Link(i int) (string, error) {
|
||||
href := get(a.baseSel, i).AttrOr("href", "")
|
||||
hrefURL, err := url.Parse(href)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var ru string
|
||||
splitPath := strings.Split(hrefURL.RawPath, "/")
|
||||
for _, item := range splitPath {
|
||||
if strings.HasPrefix(item, "RU=") {
|
||||
ru = strings.TrimPrefix(item, "RU=")
|
||||
break
|
||||
}
|
||||
}
|
||||
if ru == "" {
|
||||
return href, nil
|
||||
}
|
||||
|
||||
return url.PathUnescape(ru)
|
||||
}
|
||||
|
||||
// Desc returns the description of the search result corresponding to i
|
||||
func (a *AOL) Desc(i int) (string, error) {
|
||||
return a.baseSel.
|
||||
First().
|
||||
Parent().
|
||||
Parent().
|
||||
Next().
|
||||
Children().
|
||||
First().
|
||||
Text(), nil
|
||||
}
|
||||
|
||||
// Name returns "aol"
|
||||
func (*AOL) Name() string {
|
||||
return "aol"
|
||||
}
|
||||
|
||||
// https://search.aol.com/aol/search?q=site%3Alinkedin.com%2Fin%2F+%22Senior+Developer%22+%22Nvidia%22&rp=&s_chn=prt_bon&s_it=comsearch
|
Loading…
Reference in New Issue
Block a user