182 lines
4.4 KiB
Go
182 lines
4.4 KiB
Go
package scraper
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/json"
|
|
"log"
|
|
"net/http"
|
|
"net/http/cookiejar"
|
|
"net/url"
|
|
"time"
|
|
|
|
"golang.org/x/net/publicsuffix"
|
|
)
|
|
|
|
// Session represents a persistent HTTP session
|
|
type Session struct {
|
|
client *http.Client
|
|
headers map[string]string
|
|
baseURL string
|
|
UserAgent string
|
|
}
|
|
|
|
// NewSession creates a new session with browser-like headers
|
|
func NewSession() *Session {
|
|
// Create cookie jar first
|
|
jar, err := cookiejar.New(&cookiejar.Options{
|
|
PublicSuffixList: publicsuffix.List,
|
|
})
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
return &Session{
|
|
client: &http.Client{
|
|
Timeout: 30 * time.Second,
|
|
Jar: jar, // Set the cookie jar
|
|
},
|
|
headers: map[string]string{
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"Connection": "keep-alive",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
},
|
|
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
}
|
|
}
|
|
|
|
// SetHeader sets a header for all subsequent requests
|
|
func (s *Session) SetHeader(key, value string) {
|
|
s.headers[key] = value
|
|
}
|
|
|
|
// SetHeaders sets multiple headers at once
|
|
func (s *Session) SetHeaders(headers map[string]string) {
|
|
for key, value := range headers {
|
|
s.headers[key] = value
|
|
}
|
|
}
|
|
|
|
// SetBaseURL sets the base URL for relative paths
|
|
func (s *Session) SetBaseURL(baseURL string) {
|
|
s.baseURL = baseURL
|
|
}
|
|
|
|
// Get performs a GET request
|
|
func (s *Session) Get(url string, headers ...map[string]string) (*http.Response, error) {
|
|
// Use base URL if set and url is relative
|
|
fullURL := s.buildURL(url)
|
|
|
|
req, err := http.NewRequest("GET", fullURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.setDefaultHeaders(req)
|
|
|
|
// Add any additional headers provided
|
|
if len(headers) > 0 {
|
|
for key, value := range headers[0] {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
return s.client.Do(req)
|
|
}
|
|
|
|
// Post performs a POST request with form data
|
|
func (s *Session) Post(url string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
|
|
fullURL := s.buildURL(url)
|
|
|
|
// This is the corrected line - url.Values is from net/url package
|
|
formData := make(url.Values)
|
|
for key, value := range data {
|
|
formData.Add(key, value)
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", fullURL, bytes.NewBufferString(formData.Encode()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.setDefaultHeaders(req)
|
|
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
|
|
|
// Add any additional headers provided
|
|
if len(headers) > 0 {
|
|
for key, value := range headers[0] {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
return s.client.Do(req)
|
|
}
|
|
|
|
// PostJSON performs a POST request with JSON data
|
|
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
|
|
fullURL := s.buildURL(url)
|
|
|
|
jsonData, err := json.Marshal(data)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req, err := http.NewRequest("POST", fullURL, bytes.NewBuffer(jsonData))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.setDefaultHeaders(req)
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
// Add any additional headers provided
|
|
if len(headers) > 0 {
|
|
for key, value := range headers[0] {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
return s.client.Do(req)
|
|
}
|
|
|
|
// buildURL constructs the full URL using baseURL if set
|
|
func (s *Session) buildURL(path string) string {
|
|
if s.baseURL != "" && !isAbsoluteURL(path) {
|
|
return s.baseURL + path
|
|
}
|
|
return path
|
|
}
|
|
|
|
// isAbsoluteURL checks if the URL is absolute
|
|
func isAbsoluteURL(urlStr string) bool {
|
|
u, err := url.Parse(urlStr)
|
|
return err == nil && u.Scheme != "" && u.Host != ""
|
|
}
|
|
|
|
// setDefaultHeaders sets the default browser-like headers
|
|
func (s *Session) setDefaultHeaders(req *http.Request) {
|
|
for key, value := range s.headers {
|
|
req.Header.Set(key, value)
|
|
}
|
|
}
|
|
|
|
// GetCookies returns cookies for a given URL
|
|
func (s *Session) GetCookies(urlStr string) []*http.Cookie {
|
|
u, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
return s.client.Jar.Cookies(u)
|
|
}
|
|
|
|
// SetCookies sets cookies for a given URL
|
|
func (s *Session) SetCookies(urlStr string, cookies []*http.Cookie) {
|
|
u, err := url.Parse(urlStr)
|
|
if err != nil {
|
|
return
|
|
}
|
|
s.client.Jar.SetCookies(u, cookies)
|
|
}
|