Python scrapy.exceptions.NotConfigured() Examples
The following are 29
code examples of scrapy.exceptions.NotConfigured().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scrapy.exceptions
, or try the search function
.
Example #1
Source File: feedexport.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, settings): self.settings = settings self.urifmt = settings['FEED_URI'] if not self.urifmt: raise NotConfigured self.format = settings['FEED_FORMAT'].lower() self.export_encoding = settings['FEED_EXPORT_ENCODING'] self.storages = self._load_components('FEED_STORAGES') self.exporters = self._load_components('FEED_EXPORTERS') if not self._storage_supported(self.urifmt): raise NotConfigured if not self._exporter_supported(self.format): raise NotConfigured self.store_empty = settings.getbool('FEED_STORE_EMPTY') self._exporting = False self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None self.indent = None if settings.get('FEED_EXPORT_INDENT') is not None: self.indent = settings.getint('FEED_EXPORT_INDENT') uripar = settings['FEED_URI_PARAMS'] self._uripar = load_object(uripar) if uripar else lambda x, y: None
Example #2
Source File: telnet.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, crawler): if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'): raise NotConfigured if not TWISTED_CONCH_AVAILABLE: raise NotConfigured( 'TELNETCONSOLE_ENABLED setting is True but required twisted ' 'modules failed to import:\n' + _TWISTED_CONCH_TRACEBACK) self.crawler = crawler self.noisy = False self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')] self.host = crawler.settings['TELNETCONSOLE_HOST'] self.username = crawler.settings['TELNETCONSOLE_USERNAME'] self.password = crawler.settings['TELNETCONSOLE_PASSWORD'] if not self.password: self.password = binascii.hexlify(os.urandom(8)).decode('utf8') logger.info('Telnet Password: %s', self.password) self.crawler.signals.connect(self.start_listening, signals.engine_started) self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
Example #3
Source File: feedexport.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, settings): self.settings = settings self.urifmt = settings['FEED_URI'] if not self.urifmt: raise NotConfigured self.format = settings['FEED_FORMAT'].lower() self.export_encoding = settings['FEED_EXPORT_ENCODING'] self.storages = self._load_components('FEED_STORAGES') self.exporters = self._load_components('FEED_EXPORTERS') if not self._storage_supported(self.urifmt): raise NotConfigured if not self._exporter_supported(self.format): raise NotConfigured self.store_empty = settings.getbool('FEED_STORE_EMPTY') self._exporting = False self.export_fields = settings.getlist('FEED_EXPORT_FIELDS') or None self.indent = None if settings.get('FEED_EXPORT_INDENT') is not None: self.indent = settings.getint('FEED_EXPORT_INDENT') uripar = settings['FEED_URI_PARAMS'] self._uripar = load_object(uripar) if uripar else lambda x, y: None
Example #4
Source File: middleware.py From learn_python3_spider with MIT License | 6 votes |
def from_settings(cls, settings, crawler=None): mwlist = cls._get_mwlist_from_settings(settings) middlewares = [] enabled = [] for clspath in mwlist: try: mwcls = load_object(clspath) mw = create_instance(mwcls, settings, crawler) middlewares.append(mw) enabled.append(clspath) except NotConfigured as e: if e.args: clsname = clspath.split('.')[-1] logger.warning("Disabled %(clsname)s: %(eargs)s", {'clsname': clsname, 'eargs': e.args[0]}, extra={'crawler': crawler}) logger.info("Enabled %(componentname)ss:\n%(enabledlist)s", {'componentname': cls.component_name, 'enabledlist': pprint.pformat(enabled)}, extra={'crawler': crawler}) return cls(*middlewares)
Example #5
Source File: feed.py From learn_python3_spider with MIT License | 6 votes |
def parse(self, response): if not hasattr(self, 'parse_node'): raise NotConfigured('You must define parse_node method in order to scrape this XML feed') response = self.adapt_response(response) if self.iterator == 'iternodes': nodes = self._iternodes(response) elif self.iterator == 'xml': selector = Selector(response, type='xml') self._register_namespaces(selector) nodes = selector.xpath('//%s' % self.itertag) elif self.iterator == 'html': selector = Selector(response, type='html') self._register_namespaces(selector) nodes = selector.xpath('//%s' % self.itertag) else: raise NotSupported('Unsupported node iterator') return self.parse_nodes(response, nodes)
Example #6
Source File: __init__.py From learn_python3_spider with MIT License | 6 votes |
def _load_handler(self, scheme, skip_lazy=False): path = self._schemes[scheme] try: dhcls = load_object(path) if skip_lazy and getattr(dhcls, 'lazy', True): return None dh = dhcls(self._crawler.settings) except NotConfigured as ex: self._notconfigured[scheme] = str(ex) return None except Exception as ex: logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"', {"clspath": path, "scheme": scheme}, exc_info=True, extra={'crawler': self._crawler}) self._notconfigured[scheme] = str(ex) return None else: self._handlers[scheme] = dh return dh
Example #7
Source File: __init__.py From learn_python3_spider with MIT License | 6 votes |
def _load_handler(self, scheme, skip_lazy=False): path = self._schemes[scheme] try: dhcls = load_object(path) if skip_lazy and getattr(dhcls, 'lazy', True): return None dh = dhcls(self._crawler.settings) except NotConfigured as ex: self._notconfigured[scheme] = str(ex) return None except Exception as ex: logger.error('Loading "%(clspath)s" for scheme "%(scheme)s"', {"clspath": path, "scheme": scheme}, exc_info=True, extra={'crawler': self._crawler}) self._notconfigured[scheme] = str(ex) return None else: self._handlers[scheme] = dh return dh
Example #8
Source File: telnet.py From learn_python3_spider with MIT License | 6 votes |
def __init__(self, crawler): if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'): raise NotConfigured if not TWISTED_CONCH_AVAILABLE: raise NotConfigured( 'TELNETCONSOLE_ENABLED setting is True but required twisted ' 'modules failed to import:\n' + _TWISTED_CONCH_TRACEBACK) self.crawler = crawler self.noisy = False self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')] self.host = crawler.settings['TELNETCONSOLE_HOST'] self.username = crawler.settings['TELNETCONSOLE_USERNAME'] self.password = crawler.settings['TELNETCONSOLE_PASSWORD'] if not self.password: self.password = binascii.hexlify(os.urandom(8)).decode('utf8') logger.info('Telnet Password: %s', self.password) self.crawler.signals.connect(self.start_listening, signals.engine_started) self.crawler.signals.connect(self.stop_listening, signals.engine_stopped)
Example #9
Source File: middleware.py From learn_python3_spider with MIT License | 6 votes |
def from_settings(cls, settings, crawler=None): mwlist = cls._get_mwlist_from_settings(settings) middlewares = [] enabled = [] for clspath in mwlist: try: mwcls = load_object(clspath) mw = create_instance(mwcls, settings, crawler) middlewares.append(mw) enabled.append(clspath) except NotConfigured as e: if e.args: clsname = clspath.split('.')[-1] logger.warning("Disabled %(clsname)s: %(eargs)s", {'clsname': clsname, 'eargs': e.args[0]}, extra={'crawler': crawler}) logger.info("Enabled %(componentname)ss:\n%(enabledlist)s", {'componentname': cls.component_name, 'enabledlist': pprint.pformat(enabled)}, extra={'crawler': crawler}) return cls(*middlewares)
Example #10
Source File: middlewares.py From scrapy-rotating-proxies with MIT License | 6 votes |
def from_crawler(cls, crawler): s = crawler.settings proxy_path = s.get('ROTATING_PROXY_LIST_PATH', None) if proxy_path is not None: with codecs.open(proxy_path, 'r', encoding='utf8') as f: proxy_list = [line.strip() for line in f if line.strip()] else: proxy_list = s.getlist('ROTATING_PROXY_LIST') if not proxy_list: raise NotConfigured() mw = cls( proxy_list=proxy_list, logstats_interval=s.getfloat('ROTATING_PROXY_LOGSTATS_INTERVAL', 30), stop_if_no_proxies=s.getbool('ROTATING_PROXY_CLOSE_SPIDER', False), max_proxies_to_try=s.getint('ROTATING_PROXY_PAGE_RETRY_TIMES', 5), backoff_base=s.getfloat('ROTATING_PROXY_BACKOFF_BASE', 300), backoff_cap=s.getfloat('ROTATING_PROXY_BACKOFF_CAP', 3600), crawler=crawler, ) crawler.signals.connect(mw.engine_started, signal=signals.engine_started) crawler.signals.connect(mw.engine_stopped, signal=signals.engine_stopped) return mw
Example #11
Source File: feed.py From learn_python3_spider with MIT License | 6 votes |
def parse(self, response): if not hasattr(self, 'parse_node'): raise NotConfigured('You must define parse_node method in order to scrape this XML feed') response = self.adapt_response(response) if self.iterator == 'iternodes': nodes = self._iternodes(response) elif self.iterator == 'xml': selector = Selector(response, type='xml') self._register_namespaces(selector) nodes = selector.xpath('//%s' % self.itertag) elif self.iterator == 'html': selector = Selector(response, type='html') self._register_namespaces(selector) nodes = selector.xpath('//%s' % self.itertag) else: raise NotSupported('Unsupported node iterator') return self.parse_nodes(response, nodes)
Example #12
Source File: middlewares.py From scrapy-crawl-once with MIT License | 5 votes |
def from_crawler(cls, crawler): s = crawler.settings if not s.getbool('CRAWL_ONCE_ENABLED', True): raise NotConfigured() path = data_path(s.get('CRAWL_ONCE_PATH', 'crawl_once'), createdir=True) default = s.getbool('CRAWL_ONCE_DEFAULT', default=False) o = cls(path, crawler.stats, default) crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o
Example #13
Source File: feed.py From learn_python3_spider with MIT License | 5 votes |
def parse(self, response): if not hasattr(self, 'parse_row'): raise NotConfigured('You must define parse_row method in order to scrape this CSV feed') response = self.adapt_response(response) return self.parse_rows(response)
Example #14
Source File: middleware.py From scrapy-autounit with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, crawler): settings = crawler.settings spider = crawler.spider if not any( self.__class__.__name__ in s for s in settings.getwithbase('SPIDER_MIDDLEWARES').keys() ): raise ValueError( '%s must be in SPIDER_MIDDLEWARES' % ( self.__class__.__name__,)) if not settings.getbool('AUTOUNIT_ENABLED'): raise NotConfigured('scrapy-autounit is not enabled') if settings.getint('CONCURRENT_REQUESTS') > 1: logger.warn( 'Recording with concurrency > 1! ' 'Data races in shared object modification may create broken ' 'tests.' ) self.max_fixtures = settings.getint( 'AUTOUNIT_MAX_FIXTURES_PER_CALLBACK', default=10 ) self.max_fixtures = \ self.max_fixtures if self.max_fixtures >= 10 else 10 self.base_path = settings.get( 'AUTOUNIT_BASE_PATH', default=os.path.join(get_project_dir(), 'autounit') ) create_dir(self.base_path, exist_ok=True) clear_fixtures(self.base_path, sanitize_module_name(spider.name)) self.fixture_counters = {}
Example #15
Source File: test_middleware.py From scrapy-crawl-once with MIT License | 5 votes |
def test_not_configured(): crawler = get_crawler(settings_dict={'CRAWL_ONCE_ENABLED': False}) with pytest.raises(NotConfigured): CrawlOnceMiddleware.from_crawler(crawler)
Example #16
Source File: feedexport.py From learn_python3_spider with MIT License | 5 votes |
def _storage_supported(self, uri): scheme = urlparse(uri).scheme if scheme in self.storages: try: self._get_storage(uri) return True except NotConfigured as e: logger.error("Disabled feed storage scheme: %(scheme)s. " "Reason: %(reason)s", {'scheme': scheme, 'reason': str(e)}) else: logger.error("Unknown feed storage scheme: %(scheme)s", {'scheme': scheme})
Example #17
Source File: feedexport.py From learn_python3_spider with MIT License | 5 votes |
def _load_components(self, setting_prefix): conf = without_none_values(self.settings.getwithbase(setting_prefix)) d = {} for k, v in conf.items(): try: d[k] = load_object(v) except NotConfigured: pass return d
Example #18
Source File: sentry.py From tw-rental-house-data with MIT License | 5 votes |
def from_crawler(cls, crawler): dsn = crawler.settings.get("SENTRY_DSN", None) if dsn is None: raise NotConfigured('No SENTRY_DSN configured') return cls(dsn=dsn)
Example #19
Source File: test_pagestorage.py From scrapy-pagestorage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_from_crawler(self, mocked_hsref): crawler_mock = mock.Mock() crawler_mock.settings = Settings() self.assertRaises(NotConfigured, PageStorageMiddleware.from_crawler, crawler_mock) # test creating an instance for all other cases crawler_mock.settings = mock.Mock() mocked_values = [(True, False), (False, True), (True, True)] crawler_mock.settings.side_effect = mocked_values for _ in range(len(mocked_values)): assert isinstance(PageStorageMiddleware.from_crawler(crawler_mock), PageStorageMiddleware)
Example #20
Source File: project.py From learn_python3_spider with MIT License | 5 votes |
def project_data_dir(project='default'): """Return the current project data dir, creating it if it doesn't exist""" if not inside_project(): raise NotConfigured("Not inside a project") cfg = get_config() if cfg.has_option(DATADIR_CFG_SECTION, project): d = cfg.get(DATADIR_CFG_SECTION, project) else: scrapy_cfg = closest_scrapy_cfg() if not scrapy_cfg: raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir") d = abspath(join(dirname(scrapy_cfg), '.scrapy')) if not exists(d): os.makedirs(d) return d
Example #21
Source File: test.py From learn_python3_spider with MIT License | 5 votes |
def skip_if_no_boto(): try: is_botocore() except NotConfigured as e: raise SkipTest(e)
Example #22
Source File: pipelines.py From spidermon with BSD 3-Clause "New" or "Revised" License | 5 votes |
def from_crawler(cls, crawler): spidermon_enabled = crawler.settings.getbool("SPIDERMON_ENABLED") if not spidermon_enabled: return PassThroughPipeline() validators = defaultdict(list) allowed_types = (list, tuple, dict) def set_validators(loader, schema): if type(schema) in (list, tuple): schema = {Item: schema} for obj, paths in schema.items(): key = obj.__name__ paths = paths if type(paths) in (list, tuple) else [paths] objects = [loader(v) for v in paths] validators[key].extend(objects) for loader, name in [ (cls._load_jsonschema_validator, "SPIDERMON_VALIDATION_SCHEMAS"), (cls._load_schematics_validator, "SPIDERMON_VALIDATION_MODELS"), ]: res = crawler.settings.get(name) if not res: continue if type(res) not in allowed_types: raise NotConfigured( "Invalid <{}> type for <{}> settings, dict or list/tuple" "is required".format(type(res), name) ) set_validators(loader, res) if not validators: raise NotConfigured("No validators were found") return cls( validators=validators, stats=crawler.stats, drop_items_with_errors=crawler.settings.getbool( "SPIDERMON_VALIDATION_DROP_ITEMS_WITH_ERRORS" ), add_errors_to_items=crawler.settings.getbool( "SPIDERMON_VALIDATION_ADD_ERRORS_TO_ITEMS" ), errors_field=crawler.settings.get("SPIDERMON_VALIDATION_ERRORS_FIELD"), )
Example #23
Source File: feedexport.py From learn_python3_spider with MIT License | 5 votes |
def _storage_supported(self, uri): scheme = urlparse(uri).scheme if scheme in self.storages: try: self._get_storage(uri) return True except NotConfigured as e: logger.error("Disabled feed storage scheme: %(scheme)s. " "Reason: %(reason)s", {'scheme': scheme, 'reason': str(e)}) else: logger.error("Unknown feed storage scheme: %(scheme)s", {'scheme': scheme})
Example #24
Source File: feedexport.py From learn_python3_spider with MIT License | 5 votes |
def _load_components(self, setting_prefix): conf = without_none_values(self.settings.getwithbase(setting_prefix)) d = {} for k, v in conf.items(): try: d[k] = load_object(v) except NotConfigured: pass return d
Example #25
Source File: project.py From learn_python3_spider with MIT License | 5 votes |
def project_data_dir(project='default'): """Return the current project data dir, creating it if it doesn't exist""" if not inside_project(): raise NotConfigured("Not inside a project") cfg = get_config() if cfg.has_option(DATADIR_CFG_SECTION, project): d = cfg.get(DATADIR_CFG_SECTION, project) else: scrapy_cfg = closest_scrapy_cfg() if not scrapy_cfg: raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir") d = abspath(join(dirname(scrapy_cfg), '.scrapy')) if not exists(d): os.makedirs(d) return d
Example #26
Source File: test.py From learn_python3_spider with MIT License | 5 votes |
def skip_if_no_boto(): try: is_botocore() except NotConfigured as e: raise SkipTest(e)
Example #27
Source File: feed.py From learn_python3_spider with MIT License | 5 votes |
def parse(self, response): if not hasattr(self, 'parse_row'): raise NotConfigured('You must define parse_row method in order to scrape this CSV feed') response = self.adapt_response(response) return self.parse_rows(response)
Example #28
Source File: pipelines.py From spidermon with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _load_schematics_validator(cls, model_path): model_class = load_object(model_path) if not issubclass(model_class, Model): raise NotConfigured( "Invalid model, models must subclass schematics.models.Model" ) return SchematicsValidator(model_class)
Example #29
Source File: pipelines.py From spidermon with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _load_jsonschema_validator(cls, schema): if isinstance(schema, six.string_types): schema = get_schema_from(schema) if not isinstance(schema, dict): raise NotConfigured( "Invalid schema, jsonschemas must be defined as:\n" "- a python dict.\n" "- an object path to a python dict.\n" "- an object path to a JSON string.\n" "- a path to a JSON file." ) return JSONSchemaValidator(schema)