Previously, the timeout exception was handled in download middleware, but it was always very laborious
Today, I checked the document and found that it can be processed in the errback callback
from scrapy.spidermiddlewares.httperror import HttpError from twisted.internet.error import DNSLookupError from twisted.internet.error import TimeoutError, TCPTimedOutError yield scrapy.Request(url=full_url, errback=self.error_httpbin, dont_filter=True, callback=self.parse_list, meta={"hd": header}) def error_httpbin(self, failure): # failure.request is the Request object, if you need to retry, directly yield can # if failure.check(HttpError): # these exceptions come from HttpError spider middleware # you can get the non-200 response # response = failure.value.response # self.logger.error('HttpError on %s', response.url) if failure.check(DNSLookupError): print("DNSLookupError------->") # this is the original request request = failure.request yield request # self.logger.error('DNSLookupError on %s', request.url) elif failure.check(TimeoutError, TCPTimedOutError): print("timeout------->") request = failure.request yield request # self.logger.error('TimeoutError on %s', request.url)
It is hereby recorded that the timeout exception has not been handled in this way before