Add AOL engine

This commit is contained in:
Elara 2022-06-08 02:40:16 -07:00
parent 0dec4e27aa
commit f1aec25e0b
2 changed files with 147 additions and 1 deletions

4
.gitignore vendored
View File

@ -1,2 +1,4 @@
/static/ext/ /static/ext/
/scope /scope
/cmd/test/
/test

144
search/web/aol.go Normal file
View File

@ -0,0 +1,144 @@
package web
import (
"net/http"
"net/url"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
var aolURL = urlMustParse("https://search.aol.com/aol/search?rp=&s_chn=prt_bon&s_it=comsearch")
type AOL struct {
keyword string
userAgent string
page int
doc *goquery.Document
initDone bool
baseSel *goquery.Selection
}
// SetKeyword sets the keyword for searching
func (a *AOL) SetKeyword(keyword string) {
a.keyword = keyword
}
// SetPage sets the page number for searching
func (a *AOL) SetPage(page int) {
a.page = page * 10
if a.page > 0 {
a.page++
}
}
// SetUserAgent sets the user agent to use for the request
func (a *AOL) SetUserAgent(ua string) {
a.userAgent = ua
}
// Init runs requests for Bing search engine
func (a *AOL) Init() error {
// Copy URL so it can be changed
initURL := copyURL(aolURL)
query := initURL.Query()
// Set query
query.Set("q", a.keyword)
if a.page > 0 {
query.Set("b", strconv.Itoa(a.page))
}
// Update URL query parameters
initURL.RawQuery = query.Encode()
// Create new request for modified URL
req, err := http.NewRequest(
http.MethodGet,
initURL.String(),
nil,
)
if err != nil {
return err
}
// If no user agent, use default
if a.userAgent == "" {
a.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
// Set request user agent
req.Header.Set("User-Agent", a.userAgent)
// Perform request
res, err := http.DefaultClient.Do(req)
if err != nil {
return err
}
defer res.Body.Close()
// Create new goquery document
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return err
}
a.doc = doc
a.baseSel = doc.Find(`h3.title > a[href]`)
a.initDone = true
return nil
}
// Each runs eachCb with the index of each search result
func (a *AOL) Each(eachCb func(int) error) error {
for i := 0; i < a.baseSel.Length(); i++ {
err := eachCb(i)
if err != nil {
return err
}
}
return nil
}
// Title returns the title of the search result corresponding to i
func (a *AOL) Title(i int) (string, error) {
return get(a.baseSel, i).Text(), nil
}
// Link returns the link to the search result corresponding to i
func (a *AOL) Link(i int) (string, error) {
href := get(a.baseSel, i).AttrOr("href", "")
hrefURL, err := url.Parse(href)
if err != nil {
return "", err
}
var ru string
splitPath := strings.Split(hrefURL.RawPath, "/")
for _, item := range splitPath {
if strings.HasPrefix(item, "RU=") {
ru = strings.TrimPrefix(item, "RU=")
break
}
}
if ru == "" {
return href, nil
}
return url.PathUnescape(ru)
}
// Desc returns the description of the search result corresponding to i
func (a *AOL) Desc(i int) (string, error) {
return a.baseSel.
First().
Parent().
Parent().
Next().
Children().
First().
Text(), nil
}
// Name returns "aol"
func (*AOL) Name() string {
return "aol"
}
// https://search.aol.com/aol/search?q=site%3Alinkedin.com%2Fin%2F+%22Senior+Developer%22+%22Nvidia%22&rp=&s_chn=prt_bon&s_it=comsearch