shell setup
This commit is contained in:
181
internal/scraper/session.go
Normal file
181
internal/scraper/session.go
Normal file
@@ -0,0 +1,181 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/cookiejar"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/publicsuffix"
|
||||
)
|
||||
|
||||
// Session represents a persistent HTTP session
|
||||
type Session struct {
|
||||
client *http.Client
|
||||
headers map[string]string
|
||||
baseURL string
|
||||
UserAgent string
|
||||
}
|
||||
|
||||
// NewSession creates a new session with browser-like headers
|
||||
func NewSession() *Session {
|
||||
// Create cookie jar first
|
||||
jar, err := cookiejar.New(&cookiejar.Options{
|
||||
PublicSuffixList: publicsuffix.List,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
return &Session{
|
||||
client: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
Jar: jar, // Set the cookie jar
|
||||
},
|
||||
headers: map[string]string{
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
},
|
||||
UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
}
|
||||
}
|
||||
|
||||
// SetHeader sets a header for all subsequent requests
|
||||
func (s *Session) SetHeader(key, value string) {
|
||||
s.headers[key] = value
|
||||
}
|
||||
|
||||
// SetHeaders sets multiple headers at once
|
||||
func (s *Session) SetHeaders(headers map[string]string) {
|
||||
for key, value := range headers {
|
||||
s.headers[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
// SetBaseURL sets the base URL for relative paths
|
||||
func (s *Session) SetBaseURL(baseURL string) {
|
||||
s.baseURL = baseURL
|
||||
}
|
||||
|
||||
// Get performs a GET request
|
||||
func (s *Session) Get(url string, headers ...map[string]string) (*http.Response, error) {
|
||||
// Use base URL if set and url is relative
|
||||
fullURL := s.buildURL(url)
|
||||
|
||||
req, err := http.NewRequest("GET", fullURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.setDefaultHeaders(req)
|
||||
|
||||
// Add any additional headers provided
|
||||
if len(headers) > 0 {
|
||||
for key, value := range headers[0] {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
return s.client.Do(req)
|
||||
}
|
||||
|
||||
// Post performs a POST request with form data
|
||||
func (s *Session) Post(url string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
|
||||
fullURL := s.buildURL(url)
|
||||
|
||||
// This is the corrected line - url.Values is from net/url package
|
||||
formData := make(url.Values)
|
||||
for key, value := range data {
|
||||
formData.Add(key, value)
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", fullURL, bytes.NewBufferString(formData.Encode()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.setDefaultHeaders(req)
|
||||
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
|
||||
|
||||
// Add any additional headers provided
|
||||
if len(headers) > 0 {
|
||||
for key, value := range headers[0] {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
return s.client.Do(req)
|
||||
}
|
||||
|
||||
// PostJSON performs a POST request with JSON data
|
||||
func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
|
||||
fullURL := s.buildURL(url)
|
||||
|
||||
jsonData, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequest("POST", fullURL, bytes.NewBuffer(jsonData))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
s.setDefaultHeaders(req)
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
// Add any additional headers provided
|
||||
if len(headers) > 0 {
|
||||
for key, value := range headers[0] {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
return s.client.Do(req)
|
||||
}
|
||||
|
||||
// buildURL constructs the full URL using baseURL if set
|
||||
func (s *Session) buildURL(path string) string {
|
||||
if s.baseURL != "" && !isAbsoluteURL(path) {
|
||||
return s.baseURL + path
|
||||
}
|
||||
return path
|
||||
}
|
||||
|
||||
// isAbsoluteURL checks if the URL is absolute
|
||||
func isAbsoluteURL(urlStr string) bool {
|
||||
u, err := url.Parse(urlStr)
|
||||
return err == nil && u.Scheme != "" && u.Host != ""
|
||||
}
|
||||
|
||||
// setDefaultHeaders sets the default browser-like headers
|
||||
func (s *Session) setDefaultHeaders(req *http.Request) {
|
||||
for key, value := range s.headers {
|
||||
req.Header.Set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
// GetCookies returns cookies for a given URL
|
||||
func (s *Session) GetCookies(urlStr string) []*http.Cookie {
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return s.client.Jar.Cookies(u)
|
||||
}
|
||||
|
||||
// SetCookies sets cookies for a given URL
|
||||
func (s *Session) SetCookies(urlStr string, cookies []*http.Cookie) {
|
||||
u, err := url.Parse(urlStr)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
s.client.Jar.SetCookies(u, cookies)
|
||||
}
|
||||
Reference in New Issue
Block a user