Python scrapy.signals.spider_opened() Examples

The following are 30 code examples of scrapy.signals.spider_opened(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scrapy.signals , or try the search function .
Example #1
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self):
        task.LoopingCall(self._stop_clear).start(self.interval)
        task.LoopingCall(self.schedule_next_requests).start(self.interval_s) 
Example #2
Source File: middlewares.py    From scraping-ebay with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #3
Source File: middlewares.py    From Sentiment-analysis-of-financial-news-data with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #4
Source File: middlewares.py    From scraping-ebay with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #5
Source File: middlewares.py    From scraping-ebay with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #6
Source File: middlewares.py    From scraping-ebay with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #7
Source File: pipelines.py    From tabebot with MIT License 5 votes vote down vote up
def __init__(self):
        dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
        dispatcher.connect(self.spider_closed, signal=signals.spider_closed) 
Example #8
Source File: middlewares.py    From Sentiment-analysis-of-financial-news-data with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #9
Source File: middlewares.py    From Sentiment-analysis-of-financial-news-data with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #10
Source File: middlewares.py    From Sentiment-analysis-of-financial-news-data with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #11
Source File: pipelines.py    From tabebot with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        self.files = dict((name, open(name + '.json', 'w+b'))
                          for name in self.save_types)
        self.exporters = dict((name, UnicodeJsonLinesItemExporter(self.files[name]))
                              for name in self.save_types)
        [e.start_exporting() for e in self.exporters.values()] 
Example #12
Source File: middlewares.py    From spidermon with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #13
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider VDownloaderMiddleware opened.')

# item中间件尾部,依据在item中是否存在 _b2b89079b2f7befcf4691a98a3f0a2a2 字段来决定数据是否存放入 redis 
Example #14
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider VSpiderMiddleware opened.')

# 下载中间件需要将 process_response 函数内挂钩一个微小的部分解决一个分布式异常 
Example #15
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #16
Source File: middlewares.py    From spidermon with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info("Spider opened: %s" % spider.name) 
Example #17
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def setup_redis(self, crawler=None):
        if self.server is not None:
            return
        if crawler is None:
            crawler = getattr(self, 'crawler', None)
        if crawler is None:
            raise ValueError("crawler is required")
        settings = crawler.settings
        if self.redis_key is None:
            self.redis_key = settings.get(
                'REDIS_START_URLS_KEY', defaults.START_URLS_KEY,
            )
        self.redis_key = self.redis_key % {'name': self.name}
        if not self.redis_key.strip():
            raise ValueError("redis_key must not be empty")
        if self.redis_batch_size is None:
            self.redis_batch_size = settings.getint(
                'REDIS_START_URLS_BATCH_SIZE',
                settings.getint('CONCURRENT_REQUESTS'),
            )
        try:
            self.redis_batch_size = int(self.redis_batch_size)
        except (TypeError, ValueError):
            raise ValueError("redis_batch_size must be an integer")
        if self.redis_encoding is None:
            self.redis_encoding = settings.get('REDIS_ENCODING', defaults.REDIS_ENCODING)
        self.logger.info("Reading start URLs from redis key '%(redis_key)s' "
                         "(batch size: %(redis_batch_size)s, encoding: %(redis_encoding)s",
                         self.__dict__)
        self.server = connection.from_settings(crawler.settings)
        # 在后续的处理中,任务不再是在爬虫空闲的时候才进行任务的分配,而是一直都会执行(为了适配多任务)
        # 这样不会让一些任务得不到启动。因此 spider_idle 函数将不在负责执行 schedule_next_requests
        # 而只会抛出 DontCloseSpider 异常,
        # 并且新开一个 schedule_next_requests 函数轮询任务,用于获取启动任务
        # 并且新开一个 _stop_clear 函数轮询任务,用于检测函数停止任务
        crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
        # 将日志的模板拿到这个对象中,后续函数需要用到
        self._clear_debug_pc   = crawler.settings.getbool('CLEAR_DEBUG_PC')
        self._clear_dupefilter = crawler.settings.getbool('CLEAR_DUPEFILTER')
        self._spider_id_debg_format = crawler.settings.get('DEBUG_PC_FORMAT')
        self._spider_id_task_format = crawler.settings.get('TASK_ID_FORMAT')
        self._spider_id_dupk_format = crawler.settings.get('SCHEDULER_DUPEFILTER_KEY')
        # 这里是将该任务开启绑定两个定时执行,永不停止的函数
        # 1/ 为了检查已经停止的任务并且清理任务的空间。
        # 2/ 为了获取到新的 start_url 开启新的任务脚本进行任务的初始化并且处理任务空间的问题。
        self.limit_check = 0 # 这个参数是想让不同的任务的检查时机稍微错开一点,不要都挤在 _stop_clear 一次迭代中
        self.limit_same  = 2 # 日志快照连续相同的次数
        self.interval    = 5 # 多少秒执行一次 检测关闭任务
        # (理论上平均检测关闭的时间大概为 (limit_check+1) * (limit_same+1) * interval )
        # 测试时可以适量调整小一些方便查看框架的问题
        self.interval_s  = 2 # 多少秒执行一次 检测启动任务
        self.limit_log   = 8 # 额外的配置,check stoping 限制显示任务数,防止出现如有几百个任务每次都要全部打印的情况。
        crawler.signals.connect(self.spider_opened, signal=signals.spider_opened) 
Example #18
Source File: py_my_scrapy_redis_server.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self, spider): 
        spider.logger.info('Spider RedisCoreStats opened.') 
Example #19
Source File: middlewares.py    From vrequest with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(s.spider_closed, signal=signals.spider_closed)
        return s 
Example #20
Source File: middlewares.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)






# 配置 selenium 的使用方式 
Example #21
Source File: middlewares.py    From vrequest with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #22
Source File: middlewares.py    From vrequest with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #23
Source File: middlewares.py    From vrequest with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #24
Source File: middlewares.py    From platformsh-docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #25
Source File: middlewares.py    From platformsh-docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #26
Source File: middlewares.py    From platformsh-docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #27
Source File: middlewares.py    From platformsh-docs with Creative Commons Attribution Share Alike 4.0 International 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #28
Source File: middlewares.py    From SourceCodeOfBook with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name) 
Example #29
Source File: middlewares.py    From SourceCodeOfBook with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        # This method is used by Scrapy to create your spiders.
        s = cls()
        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
        return s 
Example #30
Source File: middlewares.py    From SourceCodeOfBook with MIT License 5 votes vote down vote up
def spider_opened(self, spider):
        spider.logger.info('Spider opened: %s' % spider.name)