diff --git a/hrequests/client.py b/hrequests/client.py index 5adb29d..0f2bff8 100644 --- a/hrequests/client.py +++ b/hrequests/client.py @@ -2,11 +2,18 @@ import re import uuid from dataclasses import dataclass -from typing import Dict, List, Optional, Set, Union +from typing import Dict, List, Optional, Set, Union, Mapping from urllib.parse import urlencode +from collections.abc import Mapping as ABCMapping + +from ast import literal_eval + from geventhttpclient import HTTPClient -from orjson import dumps, loads +from geventhttpclient.header import Headers +from orjson import dumps, loads, JSONDecodeError + +from gzip import decompress as gunzip import hrequests from hrequests.cffi import library @@ -39,6 +46,80 @@ def verify_proxy(proxy: str) -> None: if not PROXY_PATTERN.match(proxy): raise ProxyFormatException(f'Invalid proxy: {proxy}') +def _as_text(value) -> str: + if isinstance(value, (bytes, bytearray)): + # HTTP headers are standardized as latin-1 + return value.decode("latin-1", errors="replace") + return str(value) + +def normalize_headers_container(headers_obj) -> dict[str, object]: + """ + Converts any headers (dict, generator of pairs, bytes keys/values) + to a regular dict[str, str|list[str]]. + """ + if not headers_obj: + return {} + + if isinstance(headers_obj, ABCMapping): + out: dict[str, object] = {} + for k, v in headers_obj.items(): + k = _as_text(k) + if isinstance(v, (list, tuple)): + out[k] = [_as_text(x) for x in v] + else: + out[k] = _as_text(v) + return out + + out: dict[str, object] = {} + try: + for pair in headers_obj: + if not isinstance(pair, (list, tuple)) or len(pair) != 2: + continue + k, v = _as_text(pair[0]), _as_text(pair[1]) + if k in out: + if isinstance(out[k], list): + out[k].append(v) + else: + out[k] = [out[k], v] + else: + out[k] = v + except Exception: + return {} + return out + +def _get_header(headers: Mapping[str, object], name: str) -> Optional[str]: + wanted = name.lower() + for k, v in (headers or {}).items(): + if str(k).lower() == wanted: + if isinstance(v, (list, tuple)): + return _as_text(v[0]) if v else None + return _as_text(v) + return None + +def _maybe_gunzip(body: bytes, headers: Mapping[str, object]) -> bytes: + enc = (_get_header(headers, "Content-Encoding") or "").lower() + if "gzip" in enc and body and not (body.startswith(b"{") or body.startswith(b"[")): + try: + return gunzip(body) + except Exception as e: + raise RuntimeError(f"Gzip decompress failed: {e}. Prefix={body[:80]!r}") from e + return body + +def _looks_like_json_bytes(b: bytes) -> bool: + b = b.lstrip() + return b.startswith(b"{") or b.startswith(b"[") + +def _unwrap_bytearray_repr(body: bytes) -> Optional[bytes]: + if not body.startswith(b"bytearray("): + return None + + try: + obj = literal_eval(body.decode("utf-8", "replace")) + if isinstance(obj, (bytearray, bytes)): + return bytes(obj) + except Exception: + pass + return None @dataclass class TLSClient: @@ -256,6 +337,7 @@ def __post_init__(self) -> None: network_timeout=1e9, ) # CookieJar containing all currently outstanding cookies set on this session + self.headers: CaseInsensitiveDict = CaseInsensitiveDict() self.cookies: RequestsCookieJar = self.cookies or RequestsCookieJar() self._closed: bool = False # indicate if session is closed @@ -295,7 +377,7 @@ def build_request( json = dumps(json).decode('utf-8') request_body = json content_type = 'application/json' - elif data is not None and type(data) not in (str, bytes): + elif data is not None and type(data) not in (str, bytes, bytearray): request_body = urlencode(data, doseq=True) content_type = 'application/x-www-form-urlencoded' else: @@ -312,13 +394,10 @@ def build_request( headers = self.headers else: merged_headers = CaseInsensitiveDict(self.headers) - merged_headers.update(headers) - - # Remove items, where the key or value is set to None. + merged_headers.update(headers) # значения приведутся к str none_keys = [k for (k, v) in merged_headers.items() if v is None or k is None] for key in none_keys: del merged_headers[key] - headers = merged_headers # Cookies @@ -349,7 +428,7 @@ def build_request( 'catchPanics': self.catch_panics, 'headers': dict(headers) if isinstance(headers, CaseInsensitiveDict) else headers, 'headerOrder': self.header_order, - 'insecureSkipVerify': not verify, + 'insecureSkipVerify': not verify if verify is not None else False, 'isByteRequest': is_byte_request, 'detectEncoding': self.detect_encoding, 'additionalDecode': self.additional_decode, @@ -357,10 +436,12 @@ def build_request( 'requestUrl': url, 'requestMethod': method, 'requestBody': ( - base64.b64encode(request_body).decode() if is_byte_request else request_body + base64.b64encode(request_body).decode() + if is_byte_request and request_body is not None + else request_body ), 'requestCookies': cookiejar_to_list(self.cookies), - 'timeoutMilliseconds': int(timeout * 1000), + 'timeoutMilliseconds': int((timeout or 0) * 1000), 'withoutCookieJar': False, 'disableIPv6': self.disable_ipv6, } @@ -443,13 +524,58 @@ def execute_request( ''' # build request payload request_payload, headers = self.build_request(method, url, headers, *args, **kwargs) - try: - # send request - resp = self.server.post( - f'http://127.0.0.1:{library.PORT}/request', body=dumps(request_payload) + + # приводим headers в payload к простому dict[str,str] + safe_headers = {} + for k, v in (request_payload.get('headers') or {}).items(): + if v is None: + continue + safe_headers[str(k)] = str(v) + request_payload['headers'] = safe_headers + + payload_bytes = dumps(request_payload) + + # ВАЖНО: используем geventhttpclient.header.Headers, чтобы избежать str/bytes-каши + req_headers = Headers() + req_headers.add('Content-Type', 'application/json') + req_headers.add('Accept', 'application/json') + + resp = self.server.post( + f'http://127.0.0.1:{library.PORT}/request', + body=payload_bytes, + headers=req_headers, + ) + + raw = resp.read() + status = getattr(resp, "status_code", getattr(resp, "status", "NA")) + resp_headers = normalize_headers_container(getattr(resp, "headers", {}) or {}) + + if not raw: + raise ValueError("Empty response body from local proxy /request") + + body = _maybe_gunzip(raw, resp_headers) + print(f"[local-proxy] status={status} prefix={body[:120]!r}") + + if not _looks_like_json_bytes(body): + unwrapped = _unwrap_bytearray_repr(body) + if unwrapped is not None: + body = unwrapped + + if not _looks_like_json_bytes(body): + ct = _get_header(resp_headers, "Content-Type") or "" + if body.lstrip().startswith(b"<") or ("html" in ct.lower()): + raise RuntimeError( + f"Local proxy returned HTML/error page (status={status}), not JSON. " + f"Content-Type={ct!r}, prefix={body[:120]!r}" + ) + raise RuntimeError( + f"Local proxy returned non-JSON payload (status={status}). " + f"Content-Type={ct!r}, prefix={body[:120]!r}" ) - response_object = loads(resp.read()) - except Exception as e: - raise ClientException('Request failed') from e - # build response class + + try: + response_object = loads(body) + except JSONDecodeError as e: + raise JSONDecodeError(f"{e}: prefix={body[:160]!r}", e.doc, e.pos) + return self.build_response(url, headers, response_object, request_payload['proxyUrl']) diff --git a/hrequests/toolbelt.py b/hrequests/toolbelt.py index b6774ac..0102063 100644 --- a/hrequests/toolbelt.py +++ b/hrequests/toolbelt.py @@ -102,7 +102,8 @@ def __init__(self, data=None, **kwargs): def __setitem__(self, key, value): # Use the lowercased key for lookups, but store the actual # key alongside the value. - self._store[key.lower()] = (key, value) + real_value = None if value is None else str(value) + self._store[key.lower()] = (key, real_value) def __getitem__(self, key): return self._store[key.lower()][1] @@ -132,5 +133,15 @@ def __eq__(self, other): def copy(self): return CaseInsensitiveDict(self._store.values()) + def update(self, other=None, **kwargs): + if other: + if hasattr(other, "items"): + other = {k: (None if v is None else str(v)) for k, v in other.items()} + else: + other = [(k, None if v is None else str(v)) for k, v in other] + if kwargs: + kwargs = {k: (None if v is None else str(v)) for k, v in kwargs.items()} + return super().update(other, **kwargs) + def __repr__(self): return str(dict(self.items()))