199 lines
4.5 KiB
Go
199 lines
4.5 KiB
Go
/*
|
|
* Scope - A simple and minimal metasearch engine
|
|
* Copyright (C) 2021 Arsen Musayelyan
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package web
|
|
|
|
import (
|
|
"net/http"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"golang.org/x/sync/errgroup"
|
|
)
|
|
|
|
func init() {
|
|
http.DefaultClient.Timeout = 5 * time.Second
|
|
}
|
|
|
|
// Result represents a search result
|
|
type Result struct {
|
|
Title string
|
|
Link string
|
|
Desc string
|
|
Engines []string
|
|
Rank int
|
|
}
|
|
|
|
// Engine represents a search engine for web results (not images, shopping, erc.)
|
|
type Engine interface {
|
|
// Set search keyword for engine
|
|
SetKeyword(string)
|
|
|
|
// Set User Agent. If string is empty,
|
|
// an acceptable will should be used.
|
|
SetUserAgent(string)
|
|
|
|
// Set page number to search
|
|
SetPage(int)
|
|
|
|
// Initialize engine (make requests, set variables, etc.)
|
|
Init() error
|
|
|
|
// Run function for each search result,
|
|
// inputting index
|
|
Each(func(int) error) error
|
|
|
|
// Get title from index given by Each()
|
|
Title(int) (string, error)
|
|
// Get link from index given by Each()
|
|
Link(int) (string, error)
|
|
// Get description from index given by Each()
|
|
Desc(int) (string, error)
|
|
|
|
// Return shortened name of search engine.
|
|
// Should be lowercase (e.g. google, ddg, bing)
|
|
Name() string
|
|
}
|
|
|
|
// Options represents search options
|
|
type Options struct {
|
|
Keyword string
|
|
UserAgent string
|
|
Page int
|
|
}
|
|
|
|
// Search searches the given engines concurrently and returns the results
|
|
func Search(opts Options, engines ...Engine) ([]*Result, error) {
|
|
var outMtx sync.Mutex
|
|
var out []*Result
|
|
|
|
// Create new error group
|
|
wg := errgroup.Group{}
|
|
// For every engine
|
|
for index, engine := range engines {
|
|
// Copy index and engine (for goroutine)
|
|
curIndex, curEngine := index, engine
|
|
wg.Go(func() error {
|
|
// Set options
|
|
curEngine.SetKeyword(opts.Keyword)
|
|
curEngine.SetUserAgent(opts.UserAgent)
|
|
curEngine.SetPage(opts.Page)
|
|
|
|
// Attempt to init engine
|
|
if err := curEngine.Init(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// For each result
|
|
err := curEngine.Each(func(i int) error {
|
|
// Get result link
|
|
link, err := curEngine.Link(i)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Calculate result rank
|
|
rank := (curIndex * 100) + i
|
|
|
|
// Check if result exists
|
|
index, exists := linkExists(out, link)
|
|
// If result already exists
|
|
if exists {
|
|
// Add engine to the existing result
|
|
out[index].Engines = append(out[index].Engines, curEngine.Name())
|
|
// If the rank is higher than the old one, update it
|
|
if rank < out[index].Rank {
|
|
out[index].Rank = rank
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Get result title
|
|
title, err := curEngine.Title(i)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Get result description
|
|
desc, err := curEngine.Desc(i)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// If title, link, or description empty, ignore
|
|
if title == "" || link == "" || desc == "" {
|
|
return nil
|
|
}
|
|
|
|
// If length of description, truncate
|
|
if len(desc) > 500 {
|
|
desc = desc[:500] + "..."
|
|
}
|
|
|
|
// Create result struct
|
|
result := &Result{
|
|
Title: title,
|
|
Link: link,
|
|
Desc: desc,
|
|
Rank: rank,
|
|
Engines: []string{curEngine.Name()},
|
|
}
|
|
|
|
// Lock out mutex
|
|
outMtx.Lock()
|
|
// Add result to slice
|
|
out = append(out, result)
|
|
// Unlock out mutex
|
|
outMtx.Unlock()
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Sort slice by rank
|
|
sort.Slice(out, func(i, j int) bool {
|
|
return out[i].Rank < out[j].Rank
|
|
})
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// Wait for error group
|
|
if err := wg.Wait(); err != nil {
|
|
return out, err
|
|
}
|
|
|
|
return out, nil
|
|
}
|
|
|
|
// linkExists checks if a link exists in the results
|
|
func linkExists(results []*Result, link string) (int, bool) {
|
|
// For every result
|
|
for index, result := range results {
|
|
// If link is the same as provided
|
|
if result.Link == link {
|
|
// Return index with true
|
|
return index, true
|
|
}
|
|
}
|
|
return -1, false
|
|
}
|