implemented validate response

This commit is contained in:
Hazel Noack 2025-06-11 13:01:08 +02:00
parent bee6fb40a3
commit c9bd539123
2 changed files with 78 additions and 5 deletions

View File

@ -4,10 +4,12 @@ A Python library for simplified HTTP requests, featuring rate limiting, browser-
## Features ## Features
- saving responses to cache
## ToDo ## ToDo
- [ ] basic structure - [ ] basic structure
- [-] caching - [x] caching
- [ ] add cloudscraper - [ ] add cloudscraper
## License ## License

View File

@ -1,8 +1,8 @@
from __future__ import annotations from __future__ import annotations
from typing import Optional from typing import Optional, Set
import requests import requests
from datetime import timedelta from datetime import timedelta
from urllib.parse import urlparse, urlunsplit, ParseResult import time
from . import cache from . import cache
@ -11,13 +11,47 @@ class Connection:
self, self,
session: Optional[requests.Session] = None, session: Optional[requests.Session] = None,
cache_enable: bool = True, cache_enable: bool = True,
cache_expires_after: Optional[timedelta] = None cache_expires_after: Optional[timedelta] = None,
request_delay: float = 3,
additional_delay_per_try: float = 1,
error_status_codes: Optional[Set[int]] = None,
rate_limit_status_codes: Optional[Set[int]] = None,
) -> None: ) -> None:
self.session = session if session is not None else requests.Session() self.session = session if session is not None else requests.Session()
# cache related config
self.cache_enable = cache_enable self.cache_enable = cache_enable
self.cache_expires_after = cache_expires_after if cache_expires_after is not None else timedelta(hours=1) self.cache_expires_after = cache_expires_after if cache_expires_after is not None else timedelta(hours=1)
# waiting between requests
self.request_delay = request_delay
self.additional_delay_per_try = additional_delay_per_try
self.last_request: float = 0
# response validation config
self.error_status_codes = error_status_codes if error_status_codes is not None else {
400, # Bad Request
401, # Unauthorized
403, # Forbidden
404, # Not Found
405, # Method Not Allowed
406, # Not Acceptable
410, # Gone
418, # I'm a teapot
422, # Unprocessable Entity
451, # Unavailable For Legal Reasons
500, # Internal Server Error
501, # Not Implemented
502, # Bad Gateway
503, # Service Unavailable
504, # Gateway Timeout
}
self.rate_limit_status_codes = rate_limit_status_codes if rate_limit_status_codes is not None else {
429, # Too Many Requests
509, # Bandwidth Limit Exceeded
}
def generate_headers(self, referer: Optional[str] = None): def generate_headers(self, referer: Optional[str] = None):
headers = { headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:139.0) Gecko/20100101 Firefox/139.0",
@ -30,6 +64,38 @@ class Connection:
self.session.headers.update(**headers) self.session.headers.update(**headers)
def validate_response(self, response: requests.Response) -> bool:
"""
Validates the HTTP response and raises appropriate exceptions or returns True if successful.
Args:
response: The response object to validate
Returns:
bool: True if the response is valid (status code < 400 and not in error/rate limit codes)
Raises:
requests.HTTPError: For response status codes that indicate client or server errors
RateLimitExceeded: For response status codes that indicate rate limiting
"""
if response.status_code in self.error_status_codes:
raise requests.HTTPError(
f"Server returned error status code {response.status_code}: {response.reason}",
response=response
)
if response.status_code in self.rate_limit_status_codes:
return False
# For any other 4xx or 5xx status codes not explicitly configured
if response.status_code >= 400:
raise requests.HTTPError(
f"Server returned unexpected status code {response.status_code}: {response.reason}",
response=response
)
return True
def send_request(self, request: requests.Request) -> requests.Response: def send_request(self, request: requests.Request) -> requests.Response:
url = request.url url = request.url
if url is None: if url is None:
@ -37,7 +103,12 @@ class Connection:
if self.cache_enable and cache.has_cache(url): if self.cache_enable and cache.has_cache(url):
return cache.get_cache(url) return cache.get_cache(url)
# TODO wait the normal configured amount
response = self.session.send(request.prepare()) response = self.session.send(request.prepare())
# TODO validate response and retrying if necessary
if self.cache_enable: if self.cache_enable:
cache.write_cache(url, response) cache.write_cache(url, response)
return response return response