Source code for scrapy.http.response

"""
This module implements the Response class which is used to represent HTTP
responses in Scrapy.

See documentation in docs/topics/request-response.rst
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, AnyStr, TypeVar, overload
from urllib.parse import urljoin

from scrapy.exceptions import NotSupported
from scrapy.http.headers import Headers
from scrapy.http.request import Request
from scrapy.link import Link
from scrapy.utils.trackref import object_ref

if TYPE_CHECKING:
    from collections.abc import Callable, Iterable, Mapping
    from ipaddress import IPv4Address, IPv6Address

    from twisted.python.failure import Failure

    # typing.Self requires Python 3.11
    from typing_extensions import Self

    from scrapy.http.request import CallbackT, CookiesT
    from scrapy.selector import SelectorList


ResponseTypeVar = TypeVar("ResponseTypeVar", bound="Response")



[docs]
class Response(object_ref):
    """An object that represents an HTTP response, which is usually
    downloaded (by the Downloader) and fed to the Spiders for processing.
    """

    __attrs_and_slots = (
        "status",
        "request",
        "certificate",
        "ip_address",
        "protocol",
    )
    attributes: tuple[str, ...] = (
        "url",
        "headers",
        "body",
        "flags",
        *__attrs_and_slots,
    )
    """A tuple of :class:`str` objects containing the name of all public
    attributes of the class that are also keyword parameters of the
    ``__init__()`` method.

    Currently used by :meth:`Response.replace`.
    """

    __slots__ = (
        "__weakref__",
        "_url",
        "_body",
        "_headers",
        "_flags",
        *__attrs_and_slots,
    )
    del __attrs_and_slots

    def __init__(
        self,
        url: str,
        status: int = 200,
        headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
        body: bytes = b"",
        flags: list[str] | None = None,
        request: Request | None = None,
        certificate: Any = None,
        ip_address: IPv4Address | IPv6Address | None = None,
        protocol: str | None = None,
    ):
        self._headers: Headers | None = Headers(headers) if headers else None
        self.status: int = int(status)
        self._set_body(body)
        self._set_url(url)
        self.request: Request | None = request
        self._flags: list[str] | None = list(flags) if flags else None
        self.certificate: Any = certificate
        self.ip_address: IPv4Address | IPv6Address | None = ip_address
        self.protocol: str | None = protocol

    @property
    def cb_kwargs(self) -> dict[str, Any]:
        try:
            return self.request.cb_kwargs  # type: ignore[union-attr]
        except AttributeError:
            raise AttributeError(
                "Response.cb_kwargs not available, this response "
                "is not tied to any request"
            ) from None

    @property
    def meta(self) -> dict[str, Any]:
        try:
            return self.request.meta  # type: ignore[union-attr]
        except AttributeError:
            raise AttributeError(
                "Response.meta not available, this response is not tied to any request"
            ) from None

    @property
    def url(self) -> str:
        return self._url

    def _set_url(self, url: str) -> None:
        if isinstance(url, str):
            self._url: str = url
        else:
            raise TypeError(
                f"{type(self).__name__} url must be str, got {type(url).__name__}"
            )

    @property
    def body(self) -> bytes:
        return self._body

    def _set_body(self, body: bytes | None) -> None:
        if body is None:
            self._body = b""
        elif not isinstance(body, bytes):
            raise TypeError(
                "Response body must be bytes. "
                "If you want to pass unicode body use TextResponse "
                "or HtmlResponse."
            )
        else:
            self._body = body

    @property
    def headers(self) -> Headers:
        if self._headers is None:
            self._headers = Headers()
        return self._headers

    @headers.setter
    def headers(
        self, value: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None
    ) -> None:
        if isinstance(value, Headers):
            self._headers = value
        else:
            self._headers = Headers(value) if value is not None else None

    @property
    def flags(self) -> list[str]:
        if self._flags is None:
            self._flags = []
        return self._flags

    @flags.setter
    def flags(self, value: list[str] | None) -> None:
        self._flags = value

    def __repr__(self) -> str:
        return f"<{self.status} {self.url}>"


[docs]
    def copy(self) -> Self:
        """Return a copy of this Response"""
        return self.replace()


    @overload
    def replace(
        self, *args: Any, cls: type[ResponseTypeVar], **kwargs: Any
    ) -> ResponseTypeVar: ...

    @overload
    def replace(self, *args: Any, cls: None = None, **kwargs: Any) -> Self: ...


[docs]
    def replace(
        self, *args: Any, cls: type[Response] | None = None, **kwargs: Any
    ) -> Response:
        """Create a new Response with the same attributes except for those given new values"""
        for x in self.attributes:
            kwargs.setdefault(x, getattr(self, x))
        if cls is None:
            cls = self.__class__
        return cls(*args, **kwargs)



[docs]
    def urljoin(self, url: str) -> str:
        """Join this Response's url with a possible relative url to form an
        absolute interpretation of the latter."""
        return urljoin(self.url, url)


    @property
    def text(self) -> str:
        """For subclasses of TextResponse, this will return the body
        as str
        """
        raise AttributeError("Response content isn't text")

    def css(self, *a: Any, **kw: Any) -> SelectorList:
        """Shortcut method implemented only by responses whose content
        is text (subclasses of TextResponse).
        """
        raise NotSupported("Response content isn't text")

    def jmespath(self, *a: Any, **kw: Any) -> SelectorList:
        """Shortcut method implemented only by responses whose content
        is text (subclasses of TextResponse).
        """
        raise NotSupported("Response content isn't text")

    def xpath(self, *a: Any, **kw: Any) -> SelectorList:
        """Shortcut method implemented only by responses whose content
        is text (subclasses of TextResponse).
        """
        raise NotSupported("Response content isn't text")


[docs]
    def follow(
        self,
        url: str | Link,
        callback: CallbackT | None = None,
        method: str = "GET",
        headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
        body: bytes | str | None = None,
        cookies: CookiesT | None = None,
        meta: dict[str, Any] | None = None,
        encoding: str | None = "utf-8",
        priority: int = 0,
        dont_filter: bool = False,
        errback: Callable[[Failure], Any] | None = None,
        cb_kwargs: dict[str, Any] | None = None,
        flags: list[str] | None = None,
    ) -> Request:
        """
        Return a :class:`~.Request` instance to follow a link ``url``.
        It accepts the same arguments as ``Request.__init__()`` method,
        but ``url`` can be a relative URL or a :class:`~scrapy.link.Link` object,
        not only an absolute URL.

        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
        method which supports selectors in addition to absolute/relative URLs
        and Link objects.
        """
        if encoding is None:
            raise ValueError("encoding can't be None")
        if isinstance(url, Link):
            url = url.url
        elif url is None:
            raise ValueError("url can't be None")
        url = self.urljoin(url)

        return Request(
            url=url,
            callback=callback,
            method=method,
            headers=headers,
            body=body,
            cookies=cookies,
            meta=meta,
            encoding=encoding,
            priority=priority,
            dont_filter=dont_filter,
            errback=errback,
            cb_kwargs=cb_kwargs,
            flags=flags,
        )



[docs]
    def follow_all(
        self,
        urls: Iterable[str | Link],
        callback: CallbackT | None = None,
        method: str = "GET",
        headers: Mapping[AnyStr, Any] | Iterable[tuple[AnyStr, Any]] | None = None,
        body: bytes | str | None = None,
        cookies: CookiesT | None = None,
        meta: dict[str, Any] | None = None,
        encoding: str | None = "utf-8",
        priority: int = 0,
        dont_filter: bool = False,
        errback: Callable[[Failure], Any] | None = None,
        cb_kwargs: dict[str, Any] | None = None,
        flags: list[str] | None = None,
    ) -> Iterable[Request]:
        """
        Return an iterable of :class:`~.Request` instances to follow all links
        in ``urls``. It accepts the same arguments as ``Request.__init__()`` method,
        but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
        not only absolute URLs.

        :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all`
        method which supports selectors in addition to absolute/relative URLs
        and Link objects.
        """
        if not hasattr(urls, "__iter__"):
            raise TypeError("'urls' argument must be an iterable")
        return (
            self.follow(
                url=url,
                callback=callback,
                method=method,
                headers=headers,
                body=body,
                cookies=cookies,
                meta=meta,
                encoding=encoding,
                priority=priority,
                dont_filter=dont_filter,
                errback=errback,
                cb_kwargs=cb_kwargs,
                flags=flags,
            )
            for url in urls
        )