scope/search/web/web.go

199 lines
4.5 KiB
Go

/*
* Scope - A simple and minimal metasearch engine
* Copyright (C) 2021 Arsen Musayelyan
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package web
import (
"net/http"
"sort"
"sync"
"time"
"golang.org/x/sync/errgroup"
)
func init() {
http.DefaultClient.Timeout = 5 * time.Second
}
// Result represents a search result
type Result struct {
Title string
Link string
Desc string
Engines []string
Rank int
}
// Engine represents a search engine for web results (not images, shopping, erc.)
type Engine interface {
// Set search keyword for engine
SetKeyword(string)
// Set User Agent. If string is empty,
// an acceptable will should be used.
SetUserAgent(string)
// Set page number to search
SetPage(int)
// Initialize engine (make requests, set variables, etc.)
Init() error
// Run function for each search result,
// inputting index
Each(func(int) error) error
// Get title from index given by Each()
Title(int) (string, error)
// Get link from index given by Each()
Link(int) (string, error)
// Get description from index given by Each()
Desc(int) (string, error)
// Return shortened name of search engine.
// Should be lowercase (e.g. google, ddg, bing)
Name() string
}
// Options represents search options
type Options struct {
Keyword string
UserAgent string
Page int
}
// Search searches the given engines concurrently and returns the results
func Search(opts Options, engines ...Engine) ([]*Result, error) {
var outMtx sync.Mutex
var out []*Result
// Create new error group
wg := errgroup.Group{}
// For every engine
for index, engine := range engines {
// Copy index and engine (for goroutine)
curIndex, curEngine := index, engine
wg.Go(func() error {
// Set options
curEngine.SetKeyword(opts.Keyword)
curEngine.SetUserAgent(opts.UserAgent)
curEngine.SetPage(opts.Page)
// Attempt to init engine
if err := curEngine.Init(); err != nil {
return err
}
// For each result
err := curEngine.Each(func(i int) error {
// Get result link
link, err := curEngine.Link(i)
if err != nil {
return err
}
// Calculate result rank
rank := (curIndex * 100) + i
// Check if result exists
index, exists := linkExists(out, link)
// If result already exists
if exists {
// Add engine to the existing result
out[index].Engines = append(out[index].Engines, curEngine.Name())
// If the rank is higher than the old one, update it
if rank < out[index].Rank {
out[index].Rank = rank
}
return nil
}
// Get result title
title, err := curEngine.Title(i)
if err != nil {
return err
}
// Get result description
desc, err := curEngine.Desc(i)
if err != nil {
return err
}
// If title, link, or description empty, ignore
if title == "" || link == "" || desc == "" {
return nil
}
// If length of description, truncate
if len(desc) > 500 {
desc = desc[:500] + "..."
}
// Create result struct
result := &Result{
Title: title,
Link: link,
Desc: desc,
Rank: rank,
Engines: []string{curEngine.Name()},
}
// Lock out mutex
outMtx.Lock()
// Add result to slice
out = append(out, result)
// Unlock out mutex
outMtx.Unlock()
return nil
})
if err != nil {
return err
}
// Sort slice by rank
sort.Slice(out, func(i, j int) bool {
return out[i].Rank < out[j].Rank
})
return nil
})
}
// Wait for error group
if err := wg.Wait(); err != nil {
return out, err
}
return out, nil
}
// linkExists checks if a link exists in the results
func linkExists(results []*Result, link string) (int, bool) {
// For every result
for index, result := range results {
// If link is the same as provided
if result.Link == link {
// Return index with true
return index, true
}
}
return -1, false
}