Python Examples of selenium.webdriver.common.keys.Keys.PAGE

Source File: Base_Page.py From makemework with MIT License

5 votes

def scroll_down(self,locator,wait_time=2):
        "Scroll down"
        try:
            element = self.get_element(locator)
            element.send_keys(Keys.PAGE_DOWN)
            self.wait(wait_time)
        except Exception as e:
            self.write(str(e),'debug')
            self.exceptions.append("An exception occured when scrolling down")
            return None

Source File: google_scraper.py From youtube-video-face-swap with MIT License

5 votes

def search(keyword):
    base_url = "https://www.google.com/search?q={}&source=lnms&tbm=isch&sa=X&ved=0ahUKEwiwoLXK1qLVAhWqwFQKHYMwBs8Q_AUICigB"

    url = base_url.format(keyword.lower().replace(" ", "+"))

    # Create a browser and resize for exact pinpoints
    browser = webdriver.Chrome()
    browser.set_window_size(1024, 768)
    print("\n===============================================\n")
    print("[%] Successfully launched Chrome Browser")

    # Open the link
    browser.get(url)
    time.sleep(1)
    print("[%] Successfully opened link.")

    element = browser.find_element_by_tag_name("body")

    print("[%] Scrolling down.")
    # Scroll down
    for i in range(30):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    browser.find_element_by_id("smb").click()
    print("[%] Successfully clicked 'Show More Button'.")

    for i in range(50):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    time.sleep(1)

    print("[%] Reached end of Page.")
    # Get page source and close the browser
    source = browser.page_source
    browser.close()
    print("[%] Closed Browser.")

    return source

Source File: Base_Page.py From qxf2-page-object-model with MIT License

5 votes

def scroll_down(self,locator,wait_time=2):
        "Scroll down"
        try:
            element = self.get_element(locator)
            element.send_keys(Keys.PAGE_DOWN)
            self.wait(wait_time)
        except Exception as e:
            self.write(str(e),'debug')
            self.exceptions.append("An exception occured when scrolling down")
            return None

Source File: Get_Job_from_Web.py From Data-Mining-on-Social-Media with MIT License

5 votes

def scrape_url(url):
    '''
    get job info  from scrolling page
    '''
 
    # Load WebDriver and navigate to the page url.
    # This will open a browser window.
    driver = webdriver.Firefox()



    driver.get(url)

    try:
#         while ('No results' not in driver.find_element_by_class_name('empty-text').text):
        for i in range(0,50):
    
                                           
                                           
            elem = driver.find_element_by_tag_name('li')
            elem.send_keys(Keys.PAGE_DOWN)
         
        # Once the whole table has loaded, grab all the visible links.    
        visible_links = driver.find_elements_by_tag_name('li')
        for link in visible_links:
            if link.get_attribute('data-item-id') is not None:
                print visible_links
                

    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        print ''.join('!! ' + line for line in lines)  # Log it or whatever here
        logger.info(''.join('!! ' + line for line in lines) )
        print url

         
    driver.quit()

Source File: ExtractHistoricTweetID.py From Data-Mining-on-Social-Media with MIT License

5 votes

def scrape_url(url):
    '''
    get tweet id from scrolling page
    '''
 
    # Load WebDriver and navigate to the page url.
    # This will open a browser window.
    driver = webdriver.Firefox()

    driver.get(url)
   
    tweetid_list = []
#     print driver.page_source.encode("utf-8")
 
    # First scroll to the end of the table by sending Page Down keypresses to
    # the browser window.
    try:
        for i in range(0,50):
    
                                           
                                           
            elem = driver.find_element_by_tag_name('li')
            elem.send_keys(Keys.PAGE_DOWN)
         
        # Once the whole table has loaded, grab all the visible tweetid.    
        tweetids = driver.find_elements_by_tag_name('li')
        for tweetid in tweetids:
            if tweetid.get_attribute('data-item-id') is not None:
                
                tweetid_list.append(tweetid.get_attribute('data-item-id'))
    except:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
        print ''.join('!! ' + line for line in lines)  # Log it or whatever here
        logger.info(''.join('!! ' + line for line in lines) )
        print url

         
    driver.quit()
           
    return tweetid_list

Source File: custom_driver.py From king-bot with MIT License

5 votes

def scroll_down(self, element: webelement) -> None:
        element.send_keys(Keys.PAGE_DOWN)

Source File: bing_grabber.py From py-image-dataset-generator with MIT License

4 votes

def get_images_url(self, keyword: str, nb_images: int) -> List[GrabbedImage]:
        query = keyword.split()
        query = '+'.join(query)
        url = self.BING_URL % query

        print('> searching image on Bing : ' + url)

        options = webdriver.ChromeOptions()

        browser = webdriver.Chrome(chrome_options=options)

        browser.get(url)
        browser.maximize_window()
        time.sleep(2)

        elem = browser.find_element_by_tag_name("body")

        # scroll to fire the infinite scroll event and load more images
        no_of_pages_down = 20 if nb_images < 300 else 100
        while no_of_pages_down:
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)
            no_of_pages_down -= 1

        images_objects = []
        if self.full_image:
            images = browser.find_elements_by_class_name("iusc")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.BING.value
                json_content = image.get_attribute('m')
                # links for Large original image
                image_obj.url = json.loads(json_content)["murl"]
                images_objects.append(image_obj)
        else:
            images = browser.find_elements_by_class_name("mimg")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.BING.value
                src = image.get_attribute('src')
                if StringUtil.is_http_url(src):
                    image_obj.url = src
                else:
                    image_obj.base64 = src
                # links for small image
                images_objects.append(image_obj)

        browser.close()

        return images_objects

Source File: google_grabber.py From py-image-dataset-generator with MIT License

4 votes

def get_images_url(self, keyword: str, nb_images: int) -> List[GrabbedImage]:
        query = keyword.split()
        query = '+'.join(query)
        url = self.GOOGLE_URL % query

        print('> searching image on Google : ' + url)

        options = webdriver.ChromeOptions()

        browser = webdriver.Chrome(chrome_options=options)

        browser.get(url)
        time.sleep(2)

        elem = browser.find_element_by_tag_name("body")

        # scroll to fire the infinite scroll event and load more images
        no_of_pages_down = 20 if nb_images < 300 else 100
        while no_of_pages_down:
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)
            no_of_pages_down -= 1
            try:
                show_more_btn = browser.find_element_by_id("smb")
                if show_more_btn.is_displayed():
                    show_more_btn.click()
            except Exception as e:
                pass


        images_objects = []
        if self.full_image:
            images = browser.find_elements_by_class_name("rg_meta")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.GOOGLE.value
                json_content = image.get_attribute('innerHTML')
                # links for Large original image
                image_obj.url = json.loads(json_content)["ou"]
                image_obj.extension = json.loads(json_content)["ity"]
                images_objects.append(image_obj)
        else:
            images = browser.find_elements_by_class_name("rg_ic")
            for image in images:
                image_obj = GrabbedImage()
                image_obj.source = GrabSourceType.GOOGLE.value
                src = image.get_attribute('src')
                if StringUtil.is_http_url(src):
                    image_obj.url = src
                else:
                    image_obj.base64 = src
                # links for small image
                images_objects.append(image_obj)

        browser.close()

        return images_objects

Source File: _google.py From image_search with MIT License

4 votes

def search(url):
    # Create a browser and resize depending on user preference

    chrome_options = Options()
    chrome_options.add_argument("--headless")

    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.set_window_size(1024, 768)
    print("\n===============================================\n")
    print("[%] Successfully launched ChromeDriver")

    # Open the link
    browser.get(url)
    time.sleep(1)
    print("[%] Successfully opened link.")

    element = browser.find_element_by_tag_name("body")

    print("[%] Scrolling down.")
    # Scroll down
    for i in range(30):
        element.send_keys(Keys.PAGE_DOWN)
        time.sleep(0.3)  # bot id protection

    try:
        browser.find_element_by_id("smb").click()
        print("[%] Successfully clicked 'Show More Button'.")
        for i in range(50):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)  # bot id protection
    except Exception:
        for i in range(10):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)  # bot id protection

    print("[%] Reached end of Page.")

    time.sleep(1)
    # Get page source and close the browser
    source = browser.page_source
    if sys.version_info[0] > 2:
        with open('{}/dataset/logs/google/source.html'.format(os.getcwd()), 'w+', encoding='utf-8', errors='replace') as f:
            f.write(source)
    else:
        with io.open('{}/dataset/logs/google/source.html'.format(os.getcwd()), 'w+', encoding='utf-8') as f:
            f.write(source)

    browser.close()
    print("[%] Closed ChromeDriver.")

    return source

Source File: google_images_download.py From BotHub with Apache License 2.0

4 votes

def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2

Source File: collect_links.py From AutoCrawler with Apache License 2.0

4 votes

def google(self, keyword, add_url=""):
        self.browser.get("https://www.google.com/search?q={}&source=lnms&tbm=isch{}".format(keyword, add_url))

        time.sleep(1)

        print('Scrolling down')

        elem = self.browser.find_element_by_tag_name("body")

        for i in range(60):
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)

        try:
            # You may need to change this. Because google image changes rapidly.
            # btn_more = self.browser.find_element(By.XPATH, '//input[@value="결과 더보기"]')
            # self.wait_and_click('//input[@id="smb"]')
            self.wait_and_click('//input[@type="button"]')

            for i in range(60):
                elem.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.2)

        except ElementNotVisibleException:
            pass

        photo_grid_boxes = self.browser.find_elements(By.XPATH, '//div[@class="bRMDJf islir"]')

        print('Scraping links')

        links = []

        for box in photo_grid_boxes:
            try:
                imgs = box.find_elements(By.TAG_NAME, 'img')

                for img in imgs:
                    # self.highlight(img)
                    src = img.get_attribute("src")

                    # Google seems to preload 20 images as base64
                    if str(src).startswith('data:'):
                        src = img.get_attribute("data-iurl")
                    links.append(src)

            except Exception as e:
                print('[Exception occurred while collecting links from google] {}'.format(e))

        links = self.remove_duplicates(links)

        print('Collect links done. Site: {}, Keyword: {}, Total: {}'.format('google', keyword, len(links)))
        self.browser.close()

        return links

Source File: collect_links.py From AutoCrawler with Apache License 2.0

4 votes

def naver(self, keyword, add_url=""):
        self.browser.get("https://search.naver.com/search.naver?where=image&sm=tab_jum&query={}{}".format(keyword, add_url))

        time.sleep(1)

        print('Scrolling down')

        elem = self.browser.find_element_by_tag_name("body")

        for i in range(60):
            elem.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.2)

        try:
            self.wait_and_click('//a[@class="btn_more _more"]')

            for i in range(60):
                elem.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.2)

        except ElementNotVisibleException:
            pass

        photo_grid_boxes = self.browser.find_elements(By.XPATH, '//div[@class="photo_grid _box"]')

        print('Scraping links')

        links = []

        for box in photo_grid_boxes:
            try:
                imgs = box.find_elements(By.CLASS_NAME, '_img')

                for img in imgs:
                    # self.highlight(img)
                    src = img.get_attribute("src")
                    if src[0] != 'd':
                        links.append(src)
            except Exception as e:
                print('[Exception occurred while collecting links from naver] {}'.format(e))

        links = self.remove_duplicates(links)

        print('Collect links done. Site: {}, Keyword: {}, Total: {}'.format('naver', keyword, len(links)))
        self.browser.close()

        return links

Source File: googol_images.py From X-tra-Telegram with Apache License 2.0

4 votes

def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2

Source File: instagramcrawler.py From InstagramCrawler with MIT License

4 votes

def scrape_followers_or_following(self, crawl_type, query, number):
        print("Scraping {}...".format(crawl_type))
        if crawl_type == "followers":
            FOLLOW_ELE = CSS_FOLLOWERS
            FOLLOW_PATH = FOLLOWER_PATH
        elif crawl_type == "following":
            FOLLOW_ELE = CSS_FOLLOWING
            FOLLOW_PATH = FOLLOWING_PATH

        # Locate follow list
        follow_ele = WebDriverWait(self._driver, 5).until(
            EC.presence_of_element_located(
                (By.CSS_SELECTOR, FOLLOW_ELE.format(query)))
        )

        # when no number defined, check the total items
        if number is 0:
            number = int(filter(str.isdigit, str(follow_ele.text)))
            print("getting all " + str(number) + " items")

        # open desired list
        follow_ele.click()

        title_ele = WebDriverWait(self._driver, 5).until(
            EC.presence_of_element_located(
                (By.XPATH, FOLLOW_PATH))
        )
        List = title_ele.find_element_by_xpath(
            '..').find_element_by_tag_name('ul')
        List.click()

        # Loop through list till target number is reached
        num_of_shown_follow = len(List.find_elements_by_xpath('*'))
        while len(List.find_elements_by_xpath('*')) < number:
            element = List.find_elements_by_xpath('*')[-1]
            # Work around for now => should use selenium's Expected Conditions!
            try:
                element.send_keys(Keys.PAGE_DOWN)
            except Exception as e:
                time.sleep(0.1)

        follow_items = []
        for ele in List.find_elements_by_xpath('*')[:number]:
            follow_items.append(ele.text.split('\n')[0])

        self.data[crawl_type] = follow_items

Source File: google_images_download.py From Skribbl.io-Bot with MIT License

4 votes

def download_extended_page(self,url,chromedriver):
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        if sys.version_info[0] < 3:
            reload(sys)
            sys.setdefaultencoding('utf8')
        options = webdriver.ChromeOptions()
        options.add_argument('--no-sandbox')
        options.add_argument("--headless")

        try:
            browser = webdriver.Chrome(chromedriver, chrome_options=options)
        except Exception as e:
            print("Looks like we cannot locate the path the 'chromedriver' (use the '--chromedriver' "
                  "argument to specify the path to the executable.) or google chrome browser is not "
                  "installed on your machine (exception: %s)" % e)
            sys.exit()
        browser.set_window_size(1024, 768)

        # Open the link
        browser.get(url)
        time.sleep(1)
        print("Getting you a lot of images. This may take a few moments...")

        element = browser.find_element_by_tag_name("body")
        # Scroll down
        for i in range(30):
            element.send_keys(Keys.PAGE_DOWN)
            time.sleep(0.3)

        try:
            browser.find_element_by_id("smb").click()
            for i in range(50):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection
        except:
            for i in range(10):
                element.send_keys(Keys.PAGE_DOWN)
                time.sleep(0.3)  # bot id protection

        print("Reached end of Page.")
        time.sleep(0.5)

        source = browser.page_source #page source
        #close the browser
        browser.close()

        return source


    #Correcting the escape characters for python2

Python selenium.webdriver.common.keys.Keys.PAGE_DOWN Examples