Python selenium.webdriver.common.keys.Keys.PAGE_DOWN Examples

The following are 15 code examples of selenium.webdriver.common.keys.Keys.PAGE_DOWN(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module selenium.webdriver.common.keys.Keys , or try the search function .
Example #1
Source File: Base_Page.py    From makemework with MIT License 5 votes vote down vote up
def scroll_down(self,locator,wait_time=2):
        "Scroll down"
        try:
            element = self.get_element(locator)
            element.send_keys(Keys.PAGE_DOWN)
            self.wait(wait_time)
        except Exception as e:
            self.write(str(e),'debug')
            self.exceptions.append("An exception occured when scrolling down")
            return None 
Example #2
Source File: google_scraper.py    From youtube-video-face-swap with MIT License 5 votes vote down vote up
def search(keyword):
    base_url = "https://www.google.com/search?q={}&source=lnms&tbm=isch&sa=X&ved=0ahUKEwiwoLXK1qLVAhWqwFQKHYMwBs8Q_AUICigB"

    url = base_url.format(keyword.lower().replace(" ", "+"))

    # Create a browser and resize for exact pinpoints
    browser = webdriver.Chrome()
    browser.set_window_size(1024, 768)
    print("\n===============================================\n")
    print("[%] Successfully launched Chrome Browser")

    # Open the link
    browser.get(url)
    time.sleep(1)
    print("[%] Successfully opened link.")

    element = browser.find_element_by_tag_name("body")

    print("[%] Scrolling down.")
    # Scroll down
    for i in range(30):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    browser.find_element_by_id("smb").click()
    print("[%] Successfully clicked 'Show More Button'.")

    for i in range(50):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    time.sleep(1)

    print("[%] Reached end of Page.")
    # Get page source and close the browser
    source = browser.page_source
    browser.close()
    print("[%] Closed Browser.")

    return source 
Example #3
Source File: Base_Page.py    From qxf2-page-object-model with MIT License 5 votes vote down vote up
def scroll_down(self,locator,wait_time=2):
        "Scroll down"
        try:
            element = self.get_element(locator)
            element.send_keys(Keys.PAGE_DOWN)
            self.wait(wait_time)
        except Exception as e:
            self.write(str(e),'debug')
            self.exceptions.append("An exception occured when scrolling down")
            return None 
Example #4
Source File: Get_Job_from_Web.py    From Data-Mining-on-Social-Media with MIT License 5 votes vote down vote up
def scrape_url(url):
    '''
    get job info  from scrolling page
    '''
 
    # Load WebDriver and navigate to the page url.
    # This will open a browser window.
    driver = webdriver.Firefox()



    driver.get(url)

    try:
#         while ('No results' not in driver.find_element_by_class_name('empty-text').text):
        for i in range(0,50):
    
                                           
                                           
            elem = driver.find_element_by_tag_name('li')
            elem.send_keys(Keys.PAGE_DOWN)
         
        # Once the whole table has loaded, grab all the visible links.    
        visible_links = driver.find_elements_by_tag_name('li')
        for link in visible_links:
            if link.get_attribute('data-item-id') is not None:
                print visible_links
                

    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        print ''.join('!! ' + line for line in lines)  # Log it or whatever here
        logger.info(''.join('!! ' + line for line in lines) )
        print url

         
    driver.quit() 
Example #5
Source File: ExtractHistoricTweetID.py    From Data-Mining-on-Social-Media with MIT License 5 votes vote down vote up
def scrape_url(url):
    '''
    get tweet id from scrolling page
    '''
 
    # Load WebDriver and navigate to the page url.
    # This will open a browser window.
    driver = webdriver.Firefox()

    driver.get(url)
   
    tweetid_list = []
#     print driver.page_source.encode("utf-8")
 
    # First scroll to the end of the table by sending Page Down keypresses to
    # the browser window.
    try:
        for i in range(0,50):
    
                                           
                                           
            elem = driver.find_element_by_tag_name('li')
            elem.send_keys(Keys.PAGE_DOWN)
         
        # Once the whole table has loaded, grab all the visible tweetid.    
        tweetids = driver.find_elements_by_tag_name('li')
        for tweetid in tweetids:
            if tweetid.get_attribute('data-item-id') is not None:
                
                tweetid_list.append(tweetid.get_attribute('data-item-id'))
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        print ''.join('!! ' + line for line in lines)  # Log it or whatever here
        logger.info(''.join('!! ' + line for line in lines) )
        print url

         
    driver.quit()
           
    return tweetid_list 
Example #6
Source File: custom_driver.py    From king-bot with MIT License 5 votes vote down vote up
def scroll_down(self, element: webelement) -> None:
        element.send_keys(Keys.PAGE_DOWN) 
Example #7
Source File: bing_grabber.py    From py-image-dataset-generator with MIT License 4 votes vote down vote up
def get_images_url(self, keyword: str, nb_images: int) -> List[GrabbedImage]:
        query = keyword.split()
        query = '+'.join(query)
        url = self.BING_URL % query

        print('> searching image on Bing : ' + url)

        options = webdriver.ChromeOptions()

        browser = webdriver.Chrome(chrome_options=options)

        browser.get(url)
        browser.maximize_window()
        time.sleep(2)

        elem = browser.find_element_by_tag_name("body")

        # scroll to fire the infinite scroll event and load more images
        no_of_pages_down = 20 if nb_images < 300 else 100
        while no_of_pages_down:
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)
            no_of_pages_down -= 1

        images_objects = []
        if self.full_image:
            images = browser.find_elements_by_class_name("iusc")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.BING.value
                json_content = image.get_attribute('m')
                # links for Large original image
                image_obj.url = json.loads(json_content)["murl"]
                images_objects.append(image_obj)
        else:
            images = browser.find_elements_by_class_name("mimg")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.BING.value
                src = image.get_attribute('src')
                if StringUtil.is_http_url(src):
                    image_obj.url = src
                else:
                    image_obj.base64 = src
                # links for small image
                images_objects.append(image_obj)

        browser.close()

        return images_objects 
Example #8
Source File: google_grabber.py    From py-image-dataset-generator with MIT License 4 votes vote down vote up
def get_images_url(self, keyword: str, nb_images: int) -> List[GrabbedImage]:
        query = keyword.split()
        query = '+'.join(query)
        url = self.GOOGLE_URL % query

        print('> searching image on Google : ' + url)

        options = webdriver.ChromeOptions()

        browser = webdriver.Chrome(chrome_options=options)

        browser.get(url)
        time.sleep(2)

        elem = browser.find_element_by_tag_name("body")

        # scroll to fire the infinite scroll event and load more images
        no_of_pages_down = 20 if nb_images < 300 else 100
        while no_of_pages_down:
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)
            no_of_pages_down -= 1
            try:
                show_more_btn = browser.find_element_by_id("smb")
                if show_more_btn.is_displayed():
                    show_more_btn.click()
            except Exception as e:
                pass


        images_objects = []
        if self.full_image:
            images = browser.find_elements_by_class_name("rg_meta")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.GOOGLE.value
                json_content = image.get_attribute('innerHTML')
                # links for Large original image
                image_obj.url = json.loads(json_content)["ou"]
                image_obj.extension = json.loads(json_content)["ity"]
                images_objects.append(image_obj)
        else:
            images = browser.find_elements_by_class_name("rg_ic")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.GOOGLE.value
                src = image.get_attribute('src')
                if StringUtil.is_http_url(src):
                    image_obj.url = src
                else:
                    image_obj.base64 = src
                # links for small image
                images_objects.append(image_obj)

        browser.close()

        return images_objects 
Example #9
Source File: _google.py    From image_search with MIT License 4 votes vote down vote up
def search(url):
    # Create a browser and resize depending on user preference

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.set_window_size(1024, 768)
    print("\n===============================================\n")
    print("[%] Successfully launched ChromeDriver")

    # Open the link
    browser.get(url)
    time.sleep(1)
    print("[%] Successfully opened link.")

    element = browser.find_element_by_tag_name("body")

    print("[%] Scrolling down.")
    # Scroll down
    for i in range(30):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    try:
        browser.find_element_by_id("smb").click()
        print("[%] Successfully clicked 'Show More Button'.")
        for i in range(50):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)  # bot id protection
    except Exception:
        for i in range(10):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)  # bot id protection

    print("[%] Reached end of Page.")

    time.sleep(1)
    # Get page source and close the browser
    source = browser.page_source
    if sys.version_info[0] > 2:
        with open('{}/dataset/logs/google/source.html'.format(os.getcwd()), 'w+', encoding='utf-8', errors='replace') as f:
            f.write(source)
    else:
        with io.open('{}/dataset/logs/google/source.html'.format(os.getcwd()), 'w+', encoding='utf-8') as f:
            f.write(source)

    browser.close()
    print("[%] Closed ChromeDriver.")

    return source 
Example #10
Source File: google_images_download.py    From BotHub with Apache License 2.0 4 votes vote down vote up
def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2 
Example #11
Source File: collect_links.py    From AutoCrawler with Apache License 2.0 4 votes vote down vote up
def google(self, keyword, add_url=""):
        self.browser.get("https://www.google.com/search?q={}&source=lnms&tbm=isch{}".format(keyword, add_url))

        time.sleep(1)

        print('Scrolling down')

        elem = self.browser.find_element_by_tag_name("body")

        for i in range(60):
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)

        try:
            # You may need to change this. Because google image changes rapidly.
            # btn_more = self.browser.find_element(By.XPATH, '//input[@value="결과 더보기"]')
            # self.wait_and_click('//input[@id="smb"]')
            self.wait_and_click('//input[@type="button"]')

            for i in range(60):
                elem.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.2)

        except ElementNotVisibleException:
            pass

        photo_grid_boxes = self.browser.find_elements(By.XPATH, '//div[@class="bRMDJf islir"]')

        print('Scraping links')

        links = []

        for box in photo_grid_boxes:
            try:
                imgs = box.find_elements(By.TAG_NAME, 'img')

                for img in imgs:
                    # self.highlight(img)
                    src = img.get_attribute("src")

                    # Google seems to preload 20 images as base64
                    if str(src).startswith('data:'):
                        src = img.get_attribute("data-iurl")
                    links.append(src)

            except Exception as e:
                print('[Exception occurred while collecting links from google] {}'.format(e))

        links = self.remove_duplicates(links)

        print('Collect links done. Site: {}, Keyword: {}, Total: {}'.format('google', keyword, len(links)))
        self.browser.close()

        return links 
Example #12
Source File: collect_links.py    From AutoCrawler with Apache License 2.0 4 votes vote down vote up
def naver(self, keyword, add_url=""):
        self.browser.get("https://search.naver.com/search.naver?where=image&sm=tab_jum&query={}{}".format(keyword, add_url))

        time.sleep(1)

        print('Scrolling down')

        elem = self.browser.find_element_by_tag_name("body")

        for i in range(60):
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)

        try:
            self.wait_and_click('//a[@class="btn_more _more"]')

            for i in range(60):
                elem.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.2)

        except ElementNotVisibleException:
            pass

        photo_grid_boxes = self.browser.find_elements(By.XPATH, '//div[@class="photo_grid _box"]')

        print('Scraping links')

        links = []

        for box in photo_grid_boxes:
            try:
                imgs = box.find_elements(By.CLASS_NAME, '_img')

                for img in imgs:
                    # self.highlight(img)
                    src = img.get_attribute("src")
                    if src[0] != 'd':
                        links.append(src)
            except Exception as e:
                print('[Exception occurred while collecting links from naver] {}'.format(e))

        links = self.remove_duplicates(links)

        print('Collect links done. Site: {}, Keyword: {}, Total: {}'.format('naver', keyword, len(links)))
        self.browser.close()

        return links 
Example #13
Source File: googol_images.py    From X-tra-Telegram with Apache License 2.0 4 votes vote down vote up
def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2 
Example #14
Source File: instagramcrawler.py    From InstagramCrawler with MIT License 4 votes vote down vote up
def scrape_followers_or_following(self, crawl_type, query, number):
        print("Scraping {}...".format(crawl_type))
        if crawl_type == "followers":
            FOLLOW_ELE = CSS_FOLLOWERS
            FOLLOW_PATH = FOLLOWER_PATH
        elif crawl_type == "following":
            FOLLOW_ELE = CSS_FOLLOWING
            FOLLOW_PATH = FOLLOWING_PATH

        # Locate follow list
        follow_ele = WebDriverWait(self._driver, 5).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, FOLLOW_ELE.format(query)))
        )

        # when no number defined, check the total items
        if number is 0:
            number = int(filter(str.isdigit, str(follow_ele.text)))
            print("getting all " + str(number) + " items")

        # open desired list
        follow_ele.click()

        title_ele = WebDriverWait(self._driver, 5).until(
            EC.presence_of_element_located(
                (By.XPATH, FOLLOW_PATH))
        )
        List = title_ele.find_element_by_xpath(
            '..').find_element_by_tag_name('ul')
        List.click()

        # Loop through list till target number is reached
        num_of_shown_follow = len(List.find_elements_by_xpath('*'))
        while len(List.find_elements_by_xpath('*')) < number:
            element = List.find_elements_by_xpath('*')[-1]
            # Work around for now => should use selenium's Expected Conditions!
            try:
                element.send_keys(Keys.PAGE_DOWN)
            except Exception as e:
                time.sleep(0.1)

        follow_items = []
        for ele in List.find_elements_by_xpath('*')[:number]:
            follow_items.append(ele.text.split('\n')[0])

        self.data[crawl_type] = follow_items 
Example #15
Source File: google_images_download.py    From Skribbl.io-Bot with MIT License 4 votes vote down vote up
def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2