Source code for scrapy.http.response

"""
This module implements the Response class which is used to represent HTTP
responses in Scrapy.

See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, AnyStr, TypeVar, overload
from urllib.parse import urljoin

from scrapy.exceptions import NotSupported
from scrapy.http.headers import Headers
from scrapy.http.request import Request
from scrapy.link import Link
from scrapy.utils.trackref import object_ref

if TYPE_CHECKING:
    from collections.abc import Callable, Iterable, Mapping
    from ipaddress import IPv4Address, IPv6Address

    from twisted.internet.ssl import Certificate
    from twisted.python.failure import Failure

    # typing.Self requires Python 3.11
    from typing_extensions import Self

    from scrapy.http.request import CallbackT, CookiesT
    from scrapy.selector import SelectorList


ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")


[docs]class Response(object_ref): """An object that represents an HTTP response, which is usually downloaded (by the Downloader) and fed to the Spiders for processing. """ attributes: tuple[str, ...] = ( "url", "status", "headers", "body", "flags", "request", "certificate", "ip_address", "protocol", ) """A tuple of :class:`str` objects containing the name of all public attributes of the class that are also keyword parameters of the ``__init__()`` method. Currently used by :meth:`Response.replace`. """ def __init__( self, url: str, status: int = 200, headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None, body: bytes = b"", flags: list[str] | None = None, request: Request | None = None, certificate: Certificate | None = None, ip_address: IPv4Address | IPv6Address | None = None, protocol: str | None = None, ): self.headers: Headers = Headers(headers or {}) self.status: int = int(status) self._set_body(body) self._set_url(url) self.request: Request | None = request self.flags: list[str] = [] if flags is None else list(flags) self.certificate: Certificate | None = certificate self.ip_address: IPv4Address | IPv6Address | None = ip_address self.protocol: str | None = protocol @property def cb_kwargs(self) -> dict[str, Any]: try: return self.request.cb_kwargs # type: ignore[union-attr] except AttributeError: raise AttributeError( "Response.cb_kwargs not available, this response " "is not tied to any request" ) @property def meta(self) -> dict[str, Any]: try: return self.request.meta # type: ignore[union-attr] except AttributeError: raise AttributeError( "Response.meta not available, this response " "is not tied to any request" ) @property def url(self) -> str: return self._url def _set_url(self, url: str) -> None: if isinstance(url, str): self._url: str = url else: raise TypeError( f"{type(self).__name__} url must be str, got {type(url).__name__}" ) @property def body(self) -> bytes: return self._body def _set_body(self, body: bytes | None) -> None: if body is None: self._body = b"" elif not isinstance(body, bytes): raise TypeError( "Response body must be bytes. " "If you want to pass unicode body use TextResponse " "or HtmlResponse." ) else: self._body = body def __repr__(self) -> str: return f"<{self.status} {self.url}>"
[docs] def copy(self) -> Self: """Return a copy of this Response""" return self.replace()
@overload def replace( self, *args: Any, cls: type[ResponseTypeVar], **kwargs: Any ) -> ResponseTypeVar: ... @overload def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...
[docs] def replace( self, *args: Any, cls: type[Response] | None = None, **kwargs: Any ) -> Response: """Create a new Response with the same attributes except for those given new values""" for x in self.attributes: kwargs.setdefault(x, getattr(self, x)) if cls is None: cls = self.__class__ return cls(*args, **kwargs)
[docs] def urljoin(self, url: str) -> str: """Join this Response's url with a possible relative url to form an absolute interpretation of the latter.""" return urljoin(self.url, url)
@property def text(self) -> str: """For subclasses of TextResponse, this will return the body as str """ raise AttributeError("Response content isn't text") def css(self, *a: Any, **kw: Any) -> SelectorList: """Shortcut method implemented only by responses whose content is text (subclasses of TextResponse). """ raise NotSupported("Response content isn't text") def jmespath(self, *a: Any, **kw: Any) -> SelectorList: """Shortcut method implemented only by responses whose content is text (subclasses of TextResponse). """ raise NotSupported("Response content isn't text") def xpath(self, *a: Any, **kw: Any) -> SelectorList: """Shortcut method implemented only by responses whose content is text (subclasses of TextResponse). """ raise NotSupported("Response content isn't text")
[docs] def follow( self, url: str | Link, callback: CallbackT | None = None, method: str = "GET", headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None, body: bytes | str | None = None, cookies: CookiesT | None = None, meta: dict[str, Any] | None = None, encoding: str | None = "utf-8", priority: int = 0, dont_filter: bool = False, errback: Callable[[Failure], Any] | None = None, cb_kwargs: dict[str, Any] | None = None, flags: list[str] | None = None, ) -> Request: """ Return a :class:`~.Request` instance to follow a link ``url``. It accepts the same arguments as ``Request.__init__()`` method, but ``url`` can be a relative URL or a :class:`~scrapy.link.Link` object, not only an absolute URL. :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow` method which supports selectors in addition to absolute/relative URLs and Link objects. .. versionadded:: 2.0 The *flags* parameter. """ if encoding is None: raise ValueError("encoding can't be None") if isinstance(url, Link): url = url.url elif url is None: raise ValueError("url can't be None") url = self.urljoin(url) return Request( url=url, callback=callback, method=method, headers=headers, body=body, cookies=cookies, meta=meta, encoding=encoding, priority=priority, dont_filter=dont_filter, errback=errback, cb_kwargs=cb_kwargs, flags=flags, )
[docs] def follow_all( self, urls: Iterable[str | Link], callback: CallbackT | None = None, method: str = "GET", headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None, body: bytes | str | None = None, cookies: CookiesT | None = None, meta: dict[str, Any] | None = None, encoding: str | None = "utf-8", priority: int = 0, dont_filter: bool = False, errback: Callable[[Failure], Any] | None = None, cb_kwargs: dict[str, Any] | None = None, flags: list[str] | None = None, ) -> Iterable[Request]: """ .. versionadded:: 2.0 Return an iterable of :class:`~.Request` instances to follow all links in ``urls``. It accepts the same arguments as ``Request.__init__()`` method, but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects, not only absolute URLs. :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all` method which supports selectors in addition to absolute/relative URLs and Link objects. """ if not hasattr(urls, "__iter__"): raise TypeError("'urls' argument must be an iterable") return ( self.follow( url=url, callback=callback, method=method, headers=headers, body=body, cookies=cookies, meta=meta, encoding=encoding, priority=priority, dont_filter=dont_filter, errback=errback, cb_kwargs=cb_kwargs, flags=flags, ) for url in urls )