Python scrapy.loader.processors.TakeFirst() Examples
The following are 7
code examples of scrapy.loader.processors.TakeFirst().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
scrapy.loader.processors
, or try the search function
.
Example #1
Source File: sitemap_spider.py From scrapy-templates with MIT License | 6 votes |
def scrape_product(self, response): item_loader = ItemLoader(item=MyItem(), response=response) item_loader.default_input_processor = MapCompose(remove_tags) item_loader.default_output_processor = TakeFirst() item_loader.add_css("my_field", "selector") return item_loader.load_item()
Example #2
Source File: 1scr.py From scrapy-templates with MIT License | 6 votes |
def parse(self, response): item_loader = ItemLoader(item=MyItem(), response=response) item_loader.default_input_processor = MapCompose(remove_tags) item_loader.default_output_processor = TakeFirst() # #item_loader.add_css("my_field", "my_css") #item_loader.add_xpath("my_field", "my_xpath") # return item_loader.load_item()
Example #3
Source File: price_crawler.py From scrapy-templates with MIT License | 6 votes |
def parse(self, response): item = PriceItem() item_loader = ItemLoader(item=item, response=response) item_loader.default_output_processor = TakeFirst() item_loader.add_css("price", self.price_css) item_loader.add_css("stock", self.stock_css) item_loader.add_value("product_id", response.meta.get("product_id")) item_loader.add_value("cron_id", self.cron_id) item_loader.add_value("shop_id", self.shop_id) item_loader.add_value("item_id", str(uuid.uuid1())) item_loader.add_value("updated", str(datetime.datetime.now())) item_loader.add_value("url", response.url) return item_loader.load_item() # 2. Updating database by calling the backend API
Example #4
Source File: event.py From In2ItChicago with GNU General Public License v3.0 | 5 votes |
def numeric_field(): return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html), output_processor=TakeFirst())
Example #5
Source File: event.py From In2ItChicago with GNU General Public License v3.0 | 5 votes |
def price_field(): return scrapy.Field(input_processor=MapCompose( lambda value: value.replace('$', '') if type(value) == str else value, DataUtils.remove_html, float), output_processor=TakeFirst())
Example #6
Source File: event.py From In2ItChicago with GNU General Public License v3.0 | 5 votes |
def date_field(): def parse_date(value): date_format = value['date_format'] time_utils = TimeUtils(date_format=date_format) date_obj = {**create_time_data(), **value} start_timestamp, end_timestamp = time_utils.get_timestamps(date_obj) return { 'start_timestamp': start_timestamp, 'end_timestamp': end_timestamp } return scrapy.Field(input_processor=MapCompose(DataUtils.remove_html, parse_date), output_processor=TakeFirst())
Example #7
Source File: 1scr_pag.py From scrapy-templates with MIT License | 5 votes |
def populate_item(self, selector): item_loader = ItemLoader(item=MySpiderItem(), selector=selector) item_loader.default_input_processor = MapCompose(remove_tags) item_loader.default_output_processor = TakeFirst() # #item_loader.add_css("my_field", "my_css") #item_loader.add_xpath("my_field", "my_xpath") # return item_loader.load_item()