Initial Commit
This commit is contained in:
97
search/web/bing.go
Normal file
97
search/web/bing.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var bingURL = urlMustParse("https://www.bing.com/search?count=10")
|
||||
|
||||
type Bing struct {
|
||||
keyword string
|
||||
userAgent string
|
||||
first int
|
||||
doc *goquery.Document
|
||||
initDone bool
|
||||
baseSel *goquery.Selection
|
||||
}
|
||||
|
||||
func (b *Bing) SetKeyword(keyword string) {
|
||||
b.keyword = keyword
|
||||
}
|
||||
|
||||
func (b *Bing) SetPage(page int) {
|
||||
b.first = page * 10
|
||||
}
|
||||
|
||||
func (b *Bing) SetUserAgent(ua string) {
|
||||
b.userAgent = ua
|
||||
}
|
||||
|
||||
func (b *Bing) Init() error {
|
||||
initURL := copyURL(bingURL)
|
||||
query := initURL.Query()
|
||||
query.Set("q", b.keyword)
|
||||
if b.first > 0 {
|
||||
query.Set("first", strconv.Itoa(b.first))
|
||||
} else {
|
||||
query.Set("first", "1")
|
||||
}
|
||||
initURL.RawQuery = query.Encode()
|
||||
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
initURL.String(),
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if b.userAgent == "" {
|
||||
b.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
|
||||
}
|
||||
req.Header.Set("User-Agent", b.userAgent)
|
||||
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.doc = doc
|
||||
b.baseSel = doc.Find(`#b_results > li`)
|
||||
b.initDone = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bing) Each(eachCb func(int) error) error {
|
||||
for i := 0; i < b.baseSel.Length(); i++ {
|
||||
err := eachCb(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Bing) Title(i int) (string, error) {
|
||||
return get(b.baseSel, i).ChildrenFiltered("h2").Children().First().Text(), nil
|
||||
}
|
||||
|
||||
func (b *Bing) Link(i int) (string, error) {
|
||||
return get(b.baseSel, i).ChildrenFiltered("h2").Children().First().AttrOr("href", ""), nil
|
||||
}
|
||||
|
||||
func (b *Bing) Desc(i int) (string, error) {
|
||||
return get(b.baseSel, i).ChildrenFiltered(".b_caption").Children().Last().Text(), nil
|
||||
}
|
||||
|
||||
func (b *Bing) Name() string {
|
||||
return "bing"
|
||||
}
|
||||
103
search/web/ddg.go
Normal file
103
search/web/ddg.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var ddgURL = urlMustParse("https://html.duckduckgo.com/html")
|
||||
|
||||
const uddgPrefix = "//duckduckgo.com/l/?uddg="
|
||||
|
||||
type DDG struct {
|
||||
keyword string
|
||||
userAgent string
|
||||
page int
|
||||
doc *goquery.Document
|
||||
initDone bool
|
||||
baseSel *goquery.Selection
|
||||
}
|
||||
|
||||
func (d *DDG) SetKeyword(keyword string) {
|
||||
d.keyword = keyword
|
||||
}
|
||||
|
||||
func (d *DDG) SetPage(page int) {
|
||||
d.page = page * 30
|
||||
}
|
||||
|
||||
func (d *DDG) SetUserAgent(ua string) {
|
||||
d.userAgent = "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10" //ua
|
||||
}
|
||||
|
||||
func (d *DDG) Init() error {
|
||||
initURL := copyURL(ddgURL)
|
||||
query := initURL.Query()
|
||||
query.Set("q", d.keyword)
|
||||
if d.page > 0 {
|
||||
query.Set("s", strconv.Itoa(d.page))
|
||||
query.Set("dc", strconv.Itoa(d.page+1))
|
||||
}
|
||||
initURL.RawQuery = query.Encode()
|
||||
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
initURL.String(),
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.userAgent == "" {
|
||||
d.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
|
||||
}
|
||||
req.Header.Set("User-Agent", d.userAgent)
|
||||
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.doc = doc
|
||||
d.baseSel = doc.Find(`#links > .result`)
|
||||
d.initDone = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DDG) Each(eachCb func(int) error) error {
|
||||
for i := 0; i < d.baseSel.Length(); i++ {
|
||||
err := eachCb(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DDG) Title(i int) (string, error) {
|
||||
return strings.TrimSpace(get(d.baseSel, i).Children().First().ChildrenFiltered("h2").Text()), nil
|
||||
}
|
||||
|
||||
func (d *DDG) Link(i int) (string, error) {
|
||||
link := get(d.baseSel, i).Children().First().ChildrenFiltered("a").AttrOr("href", "")
|
||||
if strings.HasPrefix(link, uddgPrefix) {
|
||||
link = urlMustParse(link).Query().Get("uddg")
|
||||
}
|
||||
return link, nil
|
||||
}
|
||||
|
||||
func (d *DDG) Desc(i int) (string, error) {
|
||||
return get(d.baseSel, i).Children().First().ChildrenFiltered("a").Text(), nil
|
||||
}
|
||||
|
||||
func (d *DDG) Name() string {
|
||||
return "ddg"
|
||||
}
|
||||
108
search/web/google.go
Normal file
108
search/web/google.go
Normal file
@@ -0,0 +1,108 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var googleURL = urlMustParse("https://www.google.com/search")
|
||||
|
||||
type Google struct {
|
||||
keyword string
|
||||
userAgent string
|
||||
page int
|
||||
doc *goquery.Document
|
||||
initDone bool
|
||||
baseSel *goquery.Selection
|
||||
}
|
||||
|
||||
func (g *Google) SetKeyword(keyword string) {
|
||||
g.keyword = keyword
|
||||
}
|
||||
|
||||
func (g *Google) SetPage(page int) {
|
||||
g.page = page * 10
|
||||
}
|
||||
|
||||
func (g *Google) SetUserAgent(ua string) {
|
||||
g.userAgent = ua
|
||||
}
|
||||
|
||||
func (g *Google) Init() error {
|
||||
initURL := copyURL(googleURL)
|
||||
query := initURL.Query()
|
||||
query.Set("q", g.keyword)
|
||||
query.Set("start", strconv.Itoa(g.page))
|
||||
initURL.RawQuery = query.Encode()
|
||||
req, err := http.NewRequest(
|
||||
http.MethodGet,
|
||||
initURL.String(),
|
||||
nil,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if g.userAgent == "" {
|
||||
g.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
|
||||
}
|
||||
req.Header.Set("User-Agent", g.userAgent)
|
||||
|
||||
res, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
g.doc = doc
|
||||
g.baseSel = doc.Find(`a > h3`)
|
||||
g.initDone = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Google) Each(eachCb func(int) error) error {
|
||||
for i := 0; i < g.baseSel.Length(); i++ {
|
||||
err := eachCb(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Google) Title(i int) (string, error) {
|
||||
return get(g.baseSel, i).Text(), nil
|
||||
}
|
||||
|
||||
func (g *Google) Link(i int) (string, error) {
|
||||
return get(g.baseSel, i).Parent().AttrOr("href", ""), nil
|
||||
}
|
||||
|
||||
func (g *Google) Desc(i int) (string, error) {
|
||||
return get(g.baseSel, i).Parent().Parent().Next().Text(), nil
|
||||
}
|
||||
|
||||
func (g *Google) Name() string {
|
||||
return "google"
|
||||
}
|
||||
|
||||
func get(sel *goquery.Selection, i int) *goquery.Selection {
|
||||
return sel.Slice(i, i+1)
|
||||
}
|
||||
|
||||
func urlMustParse(urlStr string) *url.URL {
|
||||
out, _ := url.Parse(urlStr)
|
||||
return out
|
||||
}
|
||||
|
||||
func copyURL(orig *url.URL) *url.URL {
|
||||
newURL := new(url.URL)
|
||||
*newURL = *orig
|
||||
return newURL
|
||||
}
|
||||
151
search/web/search.go
Normal file
151
search/web/search.go
Normal file
@@ -0,0 +1,151 @@
|
||||
package web
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
)
|
||||
|
||||
func init() {
|
||||
http.DefaultClient.Timeout = 5 * time.Second
|
||||
}
|
||||
|
||||
type Result struct {
|
||||
Title string
|
||||
Link string
|
||||
Desc string
|
||||
Engines []string
|
||||
Rank int
|
||||
}
|
||||
|
||||
type Engine interface {
|
||||
// Set search keyword for engine
|
||||
SetKeyword(string)
|
||||
|
||||
// Set User Agent. If string is empty,
|
||||
// an acceptable will should be used.
|
||||
SetUserAgent(string)
|
||||
|
||||
// Set page number to search
|
||||
SetPage(int)
|
||||
|
||||
// Initialize engine (make requests, set variables, etc.)
|
||||
Init() error
|
||||
|
||||
// Run function for each search result,
|
||||
// inputting index
|
||||
Each(func(int) error) error
|
||||
|
||||
// Get title from index given by Each()
|
||||
Title(int) (string, error)
|
||||
// Get link from index given by Each()
|
||||
Link(int) (string, error)
|
||||
// Get description from index given by Each()
|
||||
Desc(int) (string, error)
|
||||
|
||||
// Return shortened name of search engine.
|
||||
// Should be lowercase (e.g. google, ddg, bing)
|
||||
Name() string
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
Keyword string
|
||||
UserAgent string
|
||||
Page int
|
||||
}
|
||||
|
||||
func Search(opts Options, engines ...Engine) ([]*Result, error) {
|
||||
var outMtx sync.Mutex
|
||||
var out []*Result
|
||||
|
||||
wg := errgroup.Group{}
|
||||
for index, engine := range engines {
|
||||
curIndex, curEngine := index, engine
|
||||
wg.Go(func() error {
|
||||
|
||||
curEngine.SetKeyword(opts.Keyword)
|
||||
curEngine.SetUserAgent(opts.UserAgent)
|
||||
curEngine.SetPage(opts.Page)
|
||||
|
||||
if err := curEngine.Init(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err := curEngine.Each(func(i int) error {
|
||||
link, err := curEngine.Link(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rank := (curIndex * 100) + i
|
||||
|
||||
index, exists := linkExists(out, link)
|
||||
if exists {
|
||||
out[index].Engines = append(out[index].Engines, curEngine.Name())
|
||||
if rank < out[index].Rank {
|
||||
out[index].Rank = rank
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
title, err := curEngine.Title(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
desc, err := curEngine.Desc(i)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if title == "" || link == "" || desc == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(desc) > 500 {
|
||||
desc = desc[:500] + "..."
|
||||
}
|
||||
|
||||
result := &Result{
|
||||
Title: title,
|
||||
Link: link,
|
||||
Desc: desc,
|
||||
Rank: rank,
|
||||
}
|
||||
result.Engines = append(result.Engines, curEngine.Name())
|
||||
|
||||
outMtx.Lock()
|
||||
out = append(out, result)
|
||||
outMtx.Unlock()
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sort.Slice(out, func(i, j int) bool {
|
||||
return out[i].Rank < out[j].Rank
|
||||
})
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
if err := wg.Wait(); err != nil {
|
||||
return out, err
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func linkExists(results []*Result, link string) (int, bool) {
|
||||
for index, result := range results {
|
||||
if result.Link == link {
|
||||
return index, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
Reference in New Issue
Block a user