Python scrapy.signals.spider_closed() Examples

The following are 30 code examples of scrapy.signals.spider_closed(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module scrapy.signals , or try the search function .
Example #1
Source File: pixiv-beta.py    From Pixiv-Crawler with GNU General Public License v3.0 6 votes vote down vote up
def update_collection_set(cls, item, response ,spider):
        # if cls.entry == "COLLECTION":
        cls.collection_set.add(item["pid"].split('_')[0])
        cls.process = len(cls.collection_set) - cls.init_colletion_set_size
        # for debug only
        if cls.process > cls.maxsize:
            if cls.entry == "COLLECTION":
                with open("./.trace", "wb") as f:
                    pickle.dump(cls.collection_set, f)

            # store .json file
            f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w')
            data = [item.__dict__() for item in cls.data]
            json.dump(data, f)

            print("Crawling complete, got {0} data".format(len(cls.data)))
            f.close()
            os.abort()
            # raise CloseSpider
            # cls.signalManger.send_catch_log(signal=signals.spider_closed) 
Example #2
Source File: closespider.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def __init__(self, crawler):
        self.crawler = crawler

        self.close_on = {
            'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'),
            'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
            'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
            'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
            }

        if not any(self.close_on.values()):
            raise NotConfigured

        self.counter = defaultdict(int)

        if self.close_on.get('errorcount'):
            crawler.signals.connect(self.error_count, signal=signals.spider_error)
        if self.close_on.get('pagecount'):
            crawler.signals.connect(self.page_count, signal=signals.response_received)
        if self.close_on.get('timeout'):
            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
        if self.close_on.get('itemcount'):
            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed) 
Example #3
Source File: closespider.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def __init__(self, crawler):
        self.crawler = crawler

        self.close_on = {
            'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'),
            'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'),
            'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'),
            'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'),
            }

        if not any(self.close_on.values()):
            raise NotConfigured

        self.counter = defaultdict(int)

        if self.close_on.get('errorcount'):
            crawler.signals.connect(self.error_count, signal=signals.spider_error)
        if self.close_on.get('pagecount'):
            crawler.signals.connect(self.page_count, signal=signals.response_received)
        if self.close_on.get('timeout'):
            crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
        if self.close_on.get('itemcount'):
            crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
        crawler.signals.connect(self.spider_closed, signal=signals.spider_closed) 
Example #4
Source File: stock_kdata_spider_ths.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(StockKDataSpiderTHS, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #5
Source File: feedexport.py    From learn_python3_spider with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        o = cls(crawler.settings)
        o.crawler = crawler
        crawler.signals.connect(o.open_spider, signals.spider_opened)
        crawler.signals.connect(o.close_spider, signals.spider_closed)
        crawler.signals.connect(o.item_scraped, signals.item_scraped)
        return o 
Example #6
Source File: america_stock_finance_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(AmericaStockFinanceSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #7
Source File: pixiv-beta.py    From Pixiv-Crawler with GNU General Public License v3.0 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(pixivSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        crawler.signals.connect(cls.update_collection_set, signal=signals.item_scraped)
        return spider

    # allowed_domains = [] 
Example #8
Source File: sp500_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        self.df_pe['close'] = self.df_close['close']
        self.df_pe['code'] = self.security_item['code']
        self.df_pe['securityId'] = self.security_item['id']
        self.df_pe['name'] = self.security_item['name']
        self.df_pe.to_csv(get_kdata_path(self.security_item), index=False)
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #9
Source File: america_list_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #10
Source File: america_list_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(AmericaListSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #11
Source File: stock_trading_date_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #12
Source File: stock_trading_date_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(StockTradingDateSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #13
Source File: america_stock_kdata_spider_163.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(AmericaStockKdataSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #14
Source File: stock_summary_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        self.current_df = self.current_df.loc[:, KDATA_INDEX_COL]
        print(self.current_df)
        self.current_df.to_csv(get_kdata_path(item=self.security_item), index=False)
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #15
Source File: stock_summary_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(StockSummarySpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #16
Source File: sina_category_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        if self.sh_df[self.category_type].any():
            self.sh_df.to_csv(get_security_list_path('stock', 'sh'), index=False)
        if self.sz_df[self.category_type].any():
            self.sz_df.to_csv(get_security_list_path('stock', 'sz'), index=False)
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #17
Source File: sina_category_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(SinaCategorySpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #18
Source File: stock_finance_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(StockFinanceSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #19
Source File: future_shfe_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        if self.trading_dates:
            parse_shfe_day_data()
        else:
            parse_shfe_data()
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #20
Source File: future_shfe_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(FutureShfeSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #21
Source File: shfe_trading_calendar_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        if self.trading_dates:
            if self.saved_trading_dates:
                self.trading_dates.append(self.saved_trading_dates)
            result_list = drop_duplicate(self.trading_dates)
            result_list = sorted(result_list)

            the_path = get_exchange_trading_calendar_path('future', 'shfe')
            with open(the_path, 'w') as outfile:
                json.dump(result_list, outfile)

        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason) 
Example #22
Source File: shfe_trading_calendar_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler, *args, **kwargs):
        spider = super(ShfeTradingCalendarSpider, cls).from_crawler(crawler, *args, **kwargs)
        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
        return spider 
Example #23
Source File: pipelines.py    From Learning-Python-Networking-Second-Edition with MIT License 5 votes vote down vote up
def spider_closed(self, spider):
		db.commit()
		 
    # Insert data in database 
Example #24
Source File: pipelines.py    From Learning-Python-Networking-Second-Edition with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline 
Example #25
Source File: pipelines.py    From Learning-Python-Networking-Second-Edition with MIT License 5 votes vote down vote up
def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close() 
Example #26
Source File: pipelines.py    From Learning-Python-Networking-Second-Edition with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline 
Example #27
Source File: pipelines.py    From Learning-Python-Networking-Second-Edition with MIT License 5 votes vote down vote up
def spider_closed(self, spider):
		self.file.close() 
Example #28
Source File: httpcache.py    From learn_python3_spider with MIT License 5 votes vote down vote up
def spider_closed(self, spider):
        self.storage.close_spider(spider) 
Example #29
Source File: httpcache.py    From learn_python3_spider with MIT License 5 votes vote down vote up
def from_crawler(cls, crawler):
        o = cls(crawler.settings, crawler.stats)
        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
        crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
        return o 
Example #30
Source File: america_stock_finance_spider.py    From fooltrader with MIT License 5 votes vote down vote up
def spider_closed(self, spider, reason):
        spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)