2025-10-08 10:27:16 +02:00

182 lines
4.4 KiB
Go

package scraper
import (
"bytes"
"encoding/json"
"log"
"net/http"
"net/http/cookiejar"
"net/url"
"time"
"golang.org/x/net/publicsuffix"
)
// Session represents a persistent HTTP session
type Session struct {
client *http.Client
headers map[string]string
baseURL string
UserAgent string
}
// NewSession creates a new session with browser-like headers
func NewSession() *Session {
// Create cookie jar first
jar, err := cookiejar.New(&cookiejar.Options{
PublicSuffixList: publicsuffix.List,
})
if err != nil {
log.Fatal(err)
}
return &Session{
client: &http.Client{
Timeout: 30 * time.Second,
Jar: jar, // Set the cookie jar
},
headers: map[string]string{
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
},
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
}
}
// SetHeader sets a header for all subsequent requests
func (s *Session) SetHeader(key, value string) {
s.headers[key] = value
}
// SetHeaders sets multiple headers at once
func (s *Session) SetHeaders(headers map[string]string) {
for key, value := range headers {
s.headers[key] = value
}
}
// SetBaseURL sets the base URL for relative paths
func (s *Session) SetBaseURL(baseURL string) {
s.baseURL = baseURL
}
// Get performs a GET request
func (s *Session) Get(url string, headers ...map[string]string) (*http.Response, error) {
// Use base URL if set and url is relative
fullURL := s.buildURL(url)
req, err := http.NewRequest("GET", fullURL, nil)
if err != nil {
return nil, err
}
s.setDefaultHeaders(req)
// Add any additional headers provided
if len(headers) > 0 {
for key, value := range headers[0] {
req.Header.Set(key, value)
}
}
return s.client.Do(req)
}
// Post performs a POST request with form data
func (s *Session) Post(url string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(url)
// This is the corrected line - url.Values is from net/url package
formData := make(url.Values)
for key, value := range data {
formData.Add(key, value)
}
req, err := http.NewRequest("POST", fullURL, bytes.NewBufferString(formData.Encode()))
if err != nil {
return nil, err
}
s.setDefaultHeaders(req)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
// Add any additional headers provided
if len(headers) > 0 {
for key, value := range headers[0] {
req.Header.Set(key, value)
}
}
return s.client.Do(req)
}
// PostJSON performs a POST request with JSON data
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
fullURL := s.buildURL(url)
jsonData, err := json.Marshal(data)
if err != nil {
return nil, err
}
req, err := http.NewRequest("POST", fullURL, bytes.NewBuffer(jsonData))
if err != nil {
return nil, err
}
s.setDefaultHeaders(req)
req.Header.Set("Content-Type", "application/json")
// Add any additional headers provided
if len(headers) > 0 {
for key, value := range headers[0] {
req.Header.Set(key, value)
}
}
return s.client.Do(req)
}
// buildURL constructs the full URL using baseURL if set
func (s *Session) buildURL(path string) string {
if s.baseURL != "" && !isAbsoluteURL(path) {
return s.baseURL + path
}
return path
}
// isAbsoluteURL checks if the URL is absolute
func isAbsoluteURL(urlStr string) bool {
u, err := url.Parse(urlStr)
return err == nil && u.Scheme != "" && u.Host != ""
}
// setDefaultHeaders sets the default browser-like headers
func (s *Session) setDefaultHeaders(req *http.Request) {
for key, value := range s.headers {
req.Header.Set(key, value)
}
}
// GetCookies returns cookies for a given URL
func (s *Session) GetCookies(urlStr string) []*http.Cookie {
u, err := url.Parse(urlStr)
if err != nil {
return nil
}
return s.client.Jar.Cookies(u)
}
// SetCookies sets cookies for a given URL
func (s *Session) SetCookies(urlStr string, cookies []*http.Cookie) {
u, err := url.Parse(urlStr)
if err != nil {
return
}
s.client.Jar.SetCookies(u, cookies)
}