Source code for scrapy.contracts

import re
import sys
from functools import wraps
from inspect import getmembers
from types import CoroutineType
from typing import AsyncGenerator, Dict, Optional, Type
from unittest import TestCase

from scrapy.http import Request
from scrapy.utils.python import get_spec
from scrapy.utils.spider import iterate_spider_output


[docs]class Contract: """Abstract class for contracts""" request_cls: Optional[Type[Request]] = None def __init__(self, method, *args): self.testcase_pre = _create_testcase(method, f"@{self.name} pre-hook") self.testcase_post = _create_testcase(method, f"@{self.name} post-hook") self.args = args def add_pre_hook(self, request, results): if hasattr(self, "pre_process"): cb = request.callback @wraps(cb) def wrapper(response, **cb_kwargs): try: results.startTest(self.testcase_pre) self.pre_process(response) results.stopTest(self.testcase_pre) except AssertionError: results.addFailure(self.testcase_pre, sys.exc_info()) except Exception: results.addError(self.testcase_pre, sys.exc_info()) else: results.addSuccess(self.testcase_pre) finally: cb_result = cb(response, **cb_kwargs) if isinstance(cb_result, (AsyncGenerator, CoroutineType)): raise TypeError("Contracts don't support async callbacks") return list( # pylint: disable=return-in-finally iterate_spider_output(cb_result) ) request.callback = wrapper return request def add_post_hook(self, request, results): if hasattr(self, "post_process"): cb = request.callback @wraps(cb) def wrapper(response, **cb_kwargs): cb_result = cb(response, **cb_kwargs) if isinstance(cb_result, (AsyncGenerator, CoroutineType)): raise TypeError("Contracts don't support async callbacks") output = list(iterate_spider_output(cb_result)) try: results.startTest(self.testcase_post) self.post_process(output) results.stopTest(self.testcase_post) except AssertionError: results.addFailure(self.testcase_post, sys.exc_info()) except Exception: results.addError(self.testcase_post, sys.exc_info()) else: results.addSuccess(self.testcase_post) finally: return output # pylint: disable=return-in-finally request.callback = wrapper return request
[docs] def adjust_request_args(self, args): return args
class ContractsManager: contracts: Dict[str, Contract] = {} def __init__(self, contracts): for contract in contracts: self.contracts[contract.name] = contract def tested_methods_from_spidercls(self, spidercls): is_method = re.compile(r"^\s*@", re.MULTILINE).search methods = [] for key, value in getmembers(spidercls): if callable(value) and value.__doc__ and is_method(value.__doc__): methods.append(key) return methods def extract_contracts(self, method): contracts = [] for line in method.__doc__.split("\n"): line = line.strip() if line.startswith("@"): name, args = re.match(r"@(\w+)\s*(.*)", line).groups() args = re.split(r"\s+", args) contracts.append(self.contracts[name](method, *args)) return contracts def from_spider(self, spider, results): requests = [] for method in self.tested_methods_from_spidercls(type(spider)): bound_method = spider.__getattribute__(method) try: requests.append(self.from_method(bound_method, results)) except Exception: case = _create_testcase(bound_method, "contract") results.addError(case, sys.exc_info()) return requests def from_method(self, method, results): contracts = self.extract_contracts(method) if contracts: request_cls = Request for contract in contracts: if contract.request_cls is not None: request_cls = contract.request_cls # calculate request args args, kwargs = get_spec(request_cls.__init__) # Don't filter requests to allow # testing different callbacks on the same URL. kwargs["dont_filter"] = True kwargs["callback"] = method for contract in contracts: kwargs = contract.adjust_request_args(kwargs) args.remove("self") # check if all positional arguments are defined in kwargs if set(args).issubset(set(kwargs)): request = request_cls(**kwargs) # execute pre and post hooks in order for contract in reversed(contracts): request = contract.add_pre_hook(request, results) for contract in contracts: request = contract.add_post_hook(request, results) self._clean_req(request, method, results) return request def _clean_req(self, request, method, results): """stop the request from returning objects and records any errors""" cb = request.callback @wraps(cb) def cb_wrapper(response, **cb_kwargs): try: output = cb(response, **cb_kwargs) output = list(iterate_spider_output(output)) except Exception: case = _create_testcase(method, "callback") results.addError(case, sys.exc_info()) def eb_wrapper(failure): case = _create_testcase(method, "errback") exc_info = failure.type, failure.value, failure.getTracebackObject() results.addError(case, exc_info) request.callback = cb_wrapper request.errback = eb_wrapper def _create_testcase(method, desc): spider = method.__self__.name class ContractTestCase(TestCase): def __str__(_self): return f"[{spider}] {method.__name__} ({desc})" name = f"{spider}_{method.__name__}" setattr(ContractTestCase, name, lambda x: x) return ContractTestCase(name)