Python scrapy.signals.spider_closed() Examples
The following are 30
code examples of scrapy.signals.spider_closed().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scrapy.signals
, or try the search function
.
Example #1
Source File: pixiv-beta.py From Pixiv-Crawler with GNU General Public License v3.0 | 6 votes |
def update_collection_set(cls, item, response ,spider): # if cls.entry == "COLLECTION": cls.collection_set.add(item["pid"].split('_')[0]) cls.process = len(cls.collection_set) - cls.init_colletion_set_size # for debug only if cls.process > cls.maxsize: if cls.entry == "COLLECTION": with open("./.trace", "wb") as f: pickle.dump(cls.collection_set, f) # store .json file f = open("data_{0}.json".format('_'.join(cf.get('SRH', 'TAGS').split(" "))), 'w') data = [item.__dict__() for item in cls.data] json.dump(data, f) print("Crawling complete, got {0} data".format(len(cls.data))) f.close() os.abort() # raise CloseSpider # cls.signalManger.send_catch_log(signal=signals.spider_closed)
Example #2
Source File: closespider.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, crawler): self.crawler = crawler self.close_on = { 'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'), 'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'), 'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'), 'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'), } if not any(self.close_on.values()): raise NotConfigured self.counter = defaultdict(int) if self.close_on.get('errorcount'): crawler.signals.connect(self.error_count, signal=signals.spider_error) if self.close_on.get('pagecount'): crawler.signals.connect(self.page_count, signal=signals.response_received) if self.close_on.get('timeout'): crawler.signals.connect(self.spider_opened, signal=signals.spider_opened) if self.close_on.get('itemcount'): crawler.signals.connect(self.item_scraped, signal=signals.item_scraped) crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
Example #3
Source File: closespider.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, crawler): self.crawler = crawler self.close_on = { 'timeout': crawler.settings.getfloat('CLOSESPIDER_TIMEOUT'), 'itemcount': crawler.settings.getint('CLOSESPIDER_ITEMCOUNT'), 'pagecount': crawler.settings.getint('CLOSESPIDER_PAGECOUNT'), 'errorcount': crawler.settings.getint('CLOSESPIDER_ERRORCOUNT'), } if not any(self.close_on.values()): raise NotConfigured self.counter = defaultdict(int) if self.close_on.get('errorcount'): crawler.signals.connect(self.error_count, signal=signals.spider_error) if self.close_on.get('pagecount'): crawler.signals.connect(self.page_count, signal=signals.response_received) if self.close_on.get('timeout'): crawler.signals.connect(self.spider_opened, signal=signals.spider_opened) if self.close_on.get('itemcount'): crawler.signals.connect(self.item_scraped, signal=signals.item_scraped) crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
Example #4
Source File: stock_kdata_spider_ths.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(StockKDataSpiderTHS, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #5
Source File: feedexport.py From learn_python3_spider with MIT License | 5 votes |
def from_crawler(cls, crawler): o = cls(crawler.settings) o.crawler = crawler crawler.signals.connect(o.open_spider, signals.spider_opened) crawler.signals.connect(o.close_spider, signals.spider_closed) crawler.signals.connect(o.item_scraped, signals.item_scraped) return o
Example #6
Source File: america_stock_finance_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(AmericaStockFinanceSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #7
Source File: pixiv-beta.py From Pixiv-Crawler with GNU General Public License v3.0 | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(pixivSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) crawler.signals.connect(cls.update_collection_set, signal=signals.item_scraped) return spider # allowed_domains = []
Example #8
Source File: sp500_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): self.df_pe['close'] = self.df_close['close'] self.df_pe['code'] = self.security_item['code'] self.df_pe['securityId'] = self.security_item['id'] self.df_pe['name'] = self.security_item['name'] self.df_pe.to_csv(get_kdata_path(self.security_item), index=False) spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #9
Source File: america_list_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #10
Source File: america_list_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(AmericaListSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #11
Source File: stock_trading_date_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #12
Source File: stock_trading_date_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(StockTradingDateSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #13
Source File: america_stock_kdata_spider_163.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(AmericaStockKdataSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #14
Source File: stock_summary_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): self.current_df = self.current_df.loc[:, KDATA_INDEX_COL] print(self.current_df) self.current_df.to_csv(get_kdata_path(item=self.security_item), index=False) spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #15
Source File: stock_summary_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(StockSummarySpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #16
Source File: sina_category_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): if self.sh_df[self.category_type].any(): self.sh_df.to_csv(get_security_list_path('stock', 'sh'), index=False) if self.sz_df[self.category_type].any(): self.sz_df.to_csv(get_security_list_path('stock', 'sz'), index=False) spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #17
Source File: sina_category_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(SinaCategorySpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #18
Source File: stock_finance_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(StockFinanceSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #19
Source File: future_shfe_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): if self.trading_dates: parse_shfe_day_data() else: parse_shfe_data() spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #20
Source File: future_shfe_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(FutureShfeSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #21
Source File: shfe_trading_calendar_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): if self.trading_dates: if self.saved_trading_dates: self.trading_dates.append(self.saved_trading_dates) result_list = drop_duplicate(self.trading_dates) result_list = sorted(result_list) the_path = get_exchange_trading_calendar_path('future', 'shfe') with open(the_path, 'w') as outfile: json.dump(result_list, outfile) spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)
Example #22
Source File: shfe_trading_calendar_spider.py From fooltrader with MIT License | 5 votes |
def from_crawler(cls, crawler, *args, **kwargs): spider = super(ShfeTradingCalendarSpider, cls).from_crawler(crawler, *args, **kwargs) crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed) return spider
Example #23
Source File: pipelines.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def spider_closed(self, spider): db.commit() # Insert data in database
Example #24
Source File: pipelines.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
Example #25
Source File: pipelines.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close()
Example #26
Source File: pipelines.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline
Example #27
Source File: pipelines.py From Learning-Python-Networking-Second-Edition with MIT License | 5 votes |
def spider_closed(self, spider): self.file.close()
Example #28
Source File: httpcache.py From learn_python3_spider with MIT License | 5 votes |
def spider_closed(self, spider): self.storage.close_spider(spider)
Example #29
Source File: httpcache.py From learn_python3_spider with MIT License | 5 votes |
def from_crawler(cls, crawler): o = cls(crawler.settings, crawler.stats) crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o
Example #30
Source File: america_stock_finance_spider.py From fooltrader with MIT License | 5 votes |
def spider_closed(self, spider, reason): spider.logger.info('Spider closed: %s,%s\n', spider.name, reason)