213 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			213 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package scraper
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"encoding/json"
 | |
| 	"log"
 | |
| 	"mime/multipart"
 | |
| 	"net/http"
 | |
| 	"net/http/cookiejar"
 | |
| 	"net/url"
 | |
| 	"strings"
 | |
| 	"time"
 | |
| 
 | |
| 	"golang.org/x/net/publicsuffix"
 | |
| )
 | |
| 
 | |
| // Session represents a persistent HTTP session
 | |
| type Session struct {
 | |
| 	client    *http.Client
 | |
| 	headers   map[string]string
 | |
| 	baseURL   string
 | |
| 	UserAgent string
 | |
| }
 | |
| 
 | |
| // NewSession creates a new session with browser-like headers
 | |
| func NewSession() *Session {
 | |
| 	// Create cookie jar first
 | |
| 	jar, err := cookiejar.New(&cookiejar.Options{
 | |
| 		PublicSuffixList: publicsuffix.List,
 | |
| 	})
 | |
| 	if err != nil {
 | |
| 		log.Fatal(err)
 | |
| 	}
 | |
| 
 | |
| 	return &Session{
 | |
| 		client: &http.Client{
 | |
| 			Timeout: 30 * time.Second,
 | |
| 			Jar:     jar, // Set the cookie jar
 | |
| 		},
 | |
| 		headers: map[string]string{
 | |
| 			"User-Agent":                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
 | |
| 			"Accept":                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 | |
| 			"Accept-Language":           "en-US,en;q=0.5",
 | |
| 			"Connection":                "keep-alive",
 | |
| 			"Upgrade-Insecure-Requests": "1",
 | |
| 		},
 | |
| 		UserAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // SetHeader sets a header for all subsequent requests
 | |
| func (s *Session) SetHeader(key, value string) {
 | |
| 	s.headers[key] = value
 | |
| }
 | |
| 
 | |
| // SetHeaders sets multiple headers at once
 | |
| func (s *Session) SetHeaders(headers map[string]string) {
 | |
| 	for key, value := range headers {
 | |
| 		s.headers[key] = value
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // SetBaseURL sets the base URL for relative paths
 | |
| func (s *Session) SetBaseURL(baseURL string) {
 | |
| 	s.baseURL = baseURL
 | |
| }
 | |
| 
 | |
| // Get performs a GET request
 | |
| func (s *Session) Get(url string, headers ...map[string]string) (*http.Response, error) {
 | |
| 	// Use base URL if set and url is relative
 | |
| 	fullURL := s.buildURL(url)
 | |
| 
 | |
| 	req, err := http.NewRequest("GET", fullURL, nil)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	s.setDefaultHeaders(req)
 | |
| 
 | |
| 	// Add any additional headers provided
 | |
| 	if len(headers) > 0 {
 | |
| 		for key, value := range headers[0] {
 | |
| 			req.Header.Set(key, value)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return s.client.Do(req)
 | |
| }
 | |
| 
 | |
| // Post performs a POST request with form data
 | |
| func (s *Session) PostMultipartForm(url string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
 | |
| 	fullURL := s.buildURL(url)
 | |
| 
 | |
| 	var requestBody bytes.Buffer
 | |
| 	writer := multipart.NewWriter(&requestBody)
 | |
| 	for k, v := range data {
 | |
| 		err := writer.WriteField(k, v)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 	writer.Close()
 | |
| 
 | |
| 	req, err := http.NewRequest("POST", fullURL, &requestBody)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	s.setDefaultHeaders(req)
 | |
| 	req.Header.Set("Content-Type", writer.FormDataContentType())
 | |
| 
 | |
| 	// Add any additional headers provided
 | |
| 	if len(headers) > 0 {
 | |
| 		for key, value := range headers[0] {
 | |
| 			req.Header.Set(key, value)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return s.client.Do(req)
 | |
| }
 | |
| 
 | |
| func (s *Session) PostForm(rawUrl string, data map[string]string, headers ...map[string]string) (*http.Response, error) {
 | |
| 	fullURL := s.buildURL(rawUrl)
 | |
| 
 | |
| 	// Prepare form data
 | |
| 	formData := url.Values{}
 | |
| 	for k, v := range data {
 | |
| 		formData.Add(k, v)
 | |
| 	}
 | |
| 	req, err := http.NewRequest("POST", fullURL, strings.NewReader(formData.Encode()))
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	s.setDefaultHeaders(req)
 | |
| 	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
 | |
| 
 | |
| 	// Add any additional headers provided
 | |
| 	if len(headers) > 0 {
 | |
| 		for key, value := range headers[0] {
 | |
| 			req.Header.Set(key, value)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return s.client.Do(req)
 | |
| }
 | |
| 
 | |
| // PostJSON performs a POST request with JSON data
 | |
| func (s *Session) PostJSON(url string, data interface{}, headers ...map[string]string) (*http.Response, error) {
 | |
| 	fullURL := s.buildURL(url)
 | |
| 
 | |
| 	jsonData, err := json.Marshal(data)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	req, err := http.NewRequest("POST", fullURL, bytes.NewBuffer(jsonData))
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	s.setDefaultHeaders(req)
 | |
| 	req.Header.Set("Content-Type", "application/json")
 | |
| 
 | |
| 	// Add any additional headers provided
 | |
| 	if len(headers) > 0 {
 | |
| 		for key, value := range headers[0] {
 | |
| 			req.Header.Set(key, value)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return s.client.Do(req)
 | |
| }
 | |
| 
 | |
| // buildURL constructs the full URL using baseURL if set
 | |
| func (s *Session) buildURL(path string) string {
 | |
| 	if s.baseURL != "" && !isAbsoluteURL(path) {
 | |
| 		return s.baseURL + path
 | |
| 	}
 | |
| 	return path
 | |
| }
 | |
| 
 | |
| // isAbsoluteURL checks if the URL is absolute
 | |
| func isAbsoluteURL(urlStr string) bool {
 | |
| 	u, err := url.Parse(urlStr)
 | |
| 	return err == nil && u.Scheme != "" && u.Host != ""
 | |
| }
 | |
| 
 | |
| // setDefaultHeaders sets the default browser-like headers
 | |
| func (s *Session) setDefaultHeaders(req *http.Request) {
 | |
| 	for key, value := range s.headers {
 | |
| 		req.Header.Set(key, value)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // GetCookies returns cookies for a given URL
 | |
| func (s *Session) GetCookies(urlStr string) []*http.Cookie {
 | |
| 	u, err := url.Parse(urlStr)
 | |
| 	if err != nil {
 | |
| 		return nil
 | |
| 	}
 | |
| 	return s.client.Jar.Cookies(u)
 | |
| }
 | |
| 
 | |
| // SetCookies sets cookies for a given URL
 | |
| func (s *Session) SetCookies(urlStr string, cookies []*http.Cookie) {
 | |
| 	u, err := url.Parse(urlStr)
 | |
| 	if err != nil {
 | |
| 		return
 | |
| 	}
 | |
| 	s.client.Jar.SetCookies(u, cookies)
 | |
| }
 |