Python Examples of selenium.webdriver.PhantomJS

Source File: screenshot.py From AboveTustin with MIT License

8 votes

def loadmap(self):
        '''
        loadmap()
        Creates a browser object and loads the webpage.
        It sets up the map to the proper zoom level.

        Returns the browser on success, None on fail.
        '''
        browser = webdriver.PhantomJS(desired_capabilities={'phantomjs.page.settings.resourceTimeout': '20000'})
        browser.set_window_size(abovetustin_image_width, abovetustin_image_height)

        print("getting web page {}".format(self.url))
        browser.set_page_load_timeout(15)
        browser.get(self.url)

        # Need to wait for the page to load
        timeout = g_request_timeout
        print ("waiting for page to load...")
        wait = WebDriverWait(browser, timeout)
        element = wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'vrsMenu')))
        self.browser = browser

Source File: base.py From syncPlaylist with MIT License

8 votes

def __init__(self):
        self.browse = None
        self.source_playlist = None
        self.target_playlist_tag = None
        self.success_list = list()
        self.failed_list = list()
        os.environ["webdriver.chrome.driver"] = chrome_driver_path
        os.environ["webdriver.phantomjs.driver"] = phantomjs_driver_path
        # chromedriver = chrome_driver_path
        phantomjs_driver = phantomjs_driver_path

        opts = Options()
        opts.add_argument("user-agent={}".format(headers["User-Agent"]))
        # browser = webdriver.Chrome(chromedriver)
        browser = webdriver.PhantomJS(phantomjs_driver)
        self.browser = browser
        self.wait = ui.WebDriverWait(self.browser, 5)
        self.config = Config()

Source File: scrape_espncricinfo.py From Awesome-Scripts with MIT License

7 votes

def get_latest_wallpapers():
    browser = webdriver.PhantomJS(PHANTOMJS_PATH, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    today_date = time.strftime("%d+%b+%Y")  
    yesterday = datetime.now() - timedelta(days=1)
    yesterday_date = yesterday.strftime('%d+%b+%Y')
    first_page_url = 'http://www.espncricinfo.com/ci/content/image/?datefrom='+yesterday_date+'&dateupto='+today_date+';'
    browser.get(first_page_url)
    wait = WebDriverWait(browser, 10)
    wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "img-wrap")))
    time.sleep(2)
    # let's parse our html
    soup = BeautifulSoup(browser.page_source, "html.parser")
    images = soup.find_all('div', class_='picture')
    for image in images:
        url = "http://www.espncricinfo.com/" + image.find('a').get('href')
        print(url)

Source File: worker.py From alma-slipsomat with MIT License

6 votes

def get_driver(self):
        # Start a new browser and return the WebDriver

        browser_name = self.config.get('selenium', 'browser')

        if browser_name == 'firefox':
            from selenium.webdriver import Firefox

            return Firefox()

        if browser_name == 'chrome':
            from selenium.webdriver import Chrome

            return Chrome()

        if browser_name == 'phantomjs':
            from selenium.webdriver import PhantomJS

            return PhantomJS()

        # @TODO: Add chrome
        raise RuntimeError('Unsupported/unknown browser')

Source File: KDLHASpider.py From Pansidong with GNU General Public License v3.0

6 votes

def my_run(self, page):
        raw_url = "http://www.kuaidaili.com/proxylist/{page}/"
        url = raw_url.replace("{page}", str(page))
        logger.debug(url)
        driver = webdriver.PhantomJS(executable_path=self.phantomjs_path)
        driver.get(url)
        raw_html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")

        soup = BeautifulSoup(raw_html, "html5lib")
        t_result = list()
        for tr in soup.find_all("tr")[1:]:
            each_item = {}
            td = tr.find_all("td")

            # 填充数据
            each_item['ip'] = td[0].get_text()
            each_item['port'] = td[1].get_text()
            each_item['type'] = td[2].get_text()
            each_item['protocol'] = td[3].get_text().replace(", ", "-")
            each_item['location'] = td[5].get_text()
            each_item['time'] = filter(lambda ch: ch in '0123456789.', td[6].get_text().encode("utf8"))
            t_result.append(each_item)
        return t_result

Source File: bookmark_tasks.py From crestify with BSD 3-Clause "New" or "Revised" License

6 votes

def fulltext_extract(bookmark):
    browser = webdriver.PhantomJS(service_args=[
        "--ignore-ssl-errors=true",
        "--ssl-protocol=tlsv1",
        "--load-images=no"])
    fulltext_bookmark = Bookmark.query.get(bookmark.id)
    browser.get(fulltext_bookmark.main_url)
    body = browser.find_element_by_tag_name('body')
    bodytext = body.text
    soup = BeautifulSoup4(bodytext)
    full_text = soup.text
    full_text = " ".join(full_text.split())
    full_text = full_text.replace('\n', '')
    full_text = full_text.encode('utf-8')
    fulltext_bookmark.full_text = full_text
    db.session.commit()
    browser.quit()

Source File: helpers.py From ODIN with BSD 3-Clause "New" or "Revised" License

6 votes

def setup_phantomjs():
    """Create and return a PhantomJS browser object."""
    try:
        # Setup capabilities for the PhantomJS browser
        phantomjs_capabilities = DesiredCapabilities.PHANTOMJS
        # Some basic creds to use against an HTTP Basic Auth prompt
        phantomjs_capabilities['phantomjs.page.settings.userName'] = 'none'
        phantomjs_capabilities['phantomjs.page.settings.password'] = 'none'
        # Flags to ignore SSL problems and get screenshots
        service_args = []
        service_args.append('--ignore-ssl-errors=true')
        service_args.append('--web-security=no')
        service_args.append('--ssl-protocol=any')
        # Create the PhantomJS browser and set the window size
        browser = webdriver.PhantomJS(desired_capabilities=phantomjs_capabilities,service_args=service_args)
        browser.set_window_size(1920,1080)
    except Exception as error:
        click.secho("[!] Bad news: PhantomJS failed to load (not installed?), so activities \
requiring a web browser will be skipped.",fg="red")
        click.secho("L.. Details: {}".format(error),fg="red")
        browser = None
    return browser

Source File: pytest_dallinger.py From Dallinger with MIT License

6 votes

def selenium_recruits(request, recruitment_loop):
    def recruits():
        for url in recruitment_loop:
            kwargs = {}
            driver_class = DRIVER_MAP.get(request.param, webdriver.PhantomJS)
            if driver_class is webdriver.PhantomJS:
                # PhantomJS needs a new local storage for every run
                tmpdirname = tempfile.mkdtemp()
                kwargs = {
                    "service_args": ["--local-storage-path={}".format(tmpdirname)],
                }
            driver = driver_class(**kwargs)
            driver.get(url)
            try:
                yield driver
            finally:
                try:
                    driver.quit()
                except Exception:
                    pass

    yield recruits()

Source File: accounts.py From twitter-accounts-creator-bot with MIT License

6 votes

def getWebdriver(self, driverType):
		if driverType == 'proxy':
			profile = webdriver.FirefoxProfile()
			profile.set_preference( "network.proxy.type", 1 )
			profile.set_preference( "network.proxy.socks", "127.0.0.1" )
			profile.set_preference( "network.proxy.socks_port", 9150 )
			profile.set_preference( "network.proxy.socks_remote_dns", True )
			profile.set_preference( "places.history.enabled", False )
			profile.set_preference( "privacy.clearOnShutdown.offlineApps", True )
			profile.set_preference( "privacy.clearOnShutdown.passwords", True )
			profile.set_preference( "privacy.clearOnShutdown.siteSettings", True )
			profile.set_preference( "privacy.sanitize.sanitizeOnShutdown", True )
			profile.set_preference( "signon.rememberSignons", False )
			profile.set_preference( "network.cookie.lifetimePolicy", 2 )
			profile.set_preference( "network.dns.disablePrefetch", True )
			profile.set_preference( "network.http.sendRefererHeader", 0 )
			profile.set_preference( "javascript.enabled", False )
			profile.set_preference( "permissions.default.image", 2 )
			return webdriver.Firefox(profile)
		elif driverType == 'headless':
			return webdriver.PhantomJS()
		else:
			return webdriver.Firefox()

Source File: readcomic.py From ReadComicOnline-Downloader with MIT License

6 votes

def Single_Issue(url,Quality):
	#print url
	print 'Quality To Download : ',Quality[0]
	print 'Order To Download : ',Quality[1]
	#sys.exit()
	#print url,' This is first'
	
	browser = webdriver.PhantomJS(service_args=['--load-images=no'])
	browser.get(url)
	try:
		element = WebDriverWait(browser, 10).until(
			EC.presence_of_element_located((By.ID, "stSegmentFrame"))
		)
		#print 'Downloading the whole page! Will take some time, please don\'t close this script...\n'
		#print 'I\'ve waited long enough'
	except Exception, e:
		#raise e
		browser.save_screenshot('Single_exception.png')
		print e
		pass

Source File: main.py From SneakerBotTutorials with MIT License

6 votes

def createHeadlessBrowser(proxy=None, XResolution=1024, YResolution=768, timeout=20):
	#proxy = None
	if TEST_MODE == False:
		dcap = dict(DesiredCapabilities.PHANTOMJS)
		dcap["phantomjs.page.settings.userAgent"] = (
		    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36')
		# Fake browser headers
		if proxy != None:
			# This means the user set a proxy
			service_args = ['--proxy={}'.format(proxy),'--proxy-type=https','--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false',]
			driver = webdriver.PhantomJS(service_args=service_args, desired_capabilities=dcap)
		else:
			# No proxy was set by the user
			driver = webdriver.PhantomJS(desired_capabilities=dcap)
		driver.set_window_size(XResolution,YResolution)
		# Sets the screen resolution
		# Ideally this will be dynamic based on the number of browsers open
		driver.set_page_load_timeout(timeout)
		# Sets the timeout for the selenium window
	else:
		driver = webdriver.Firefox()
	return driver
	# Returns driver instance

Source File: form.py From cmdbac with Apache License 2.0

6 votes

def __init__(self, *args, **kwargs):
        super(FormSpider, self).__init__(*args, **kwargs)

        self.start_urls = [kwargs.get('start_url')]

        follow = True if kwargs.get('follow') == 'true' else False
        self.rules = (
            Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=follow),
        )
        super(FormSpider, self)._compile_rules()

        try:
            proxy = kwargs.get('proxy')
            service_args = [
                '--proxy=' + proxy,
                '--proxy-type=http',
            ]
        except:
            service_args = None
        self.browser = webdriver.PhantomJS(service_args=service_args)

Source File: headless.py From darklight with Apache License 2.0

6 votes

def __init__(self, ini, load_image=False, tor_network=False):
        service_args = []
        service_log_path = '/dev/null'

        # if browser don't load image
        if not load_image:
            service_args.append('--load-images=no')

        # selenium connect tor proxy
        if tor_network:
            proxy = '{}:{}'.format(
                ini.read('TOR', 'HOST'),
                ini.read('TOR', 'PORT'))

            service_args.append('--proxy={}'.format(proxy))
            service_args.append('--proxy-type=socks5')

        self.driver = webdriver.PhantomJS(
            executable_path=ini.read('HEADLESS', 'PATH'),
            service_args=service_args,
            service_log_path=service_log_path
        )

Source File: struts2scan.py From ZEROScan with MIT License

6 votes

def gethtml(url):
    try:
        headers = {}

        headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
        headers['Accept-Language'] ='en-us;q=0.5,en;q=0.3'
        headers['Referer'] = url

        u = requests.get(url, timeout=3, headers=headers, allow_redirects=True)
        content = u.text
        return {"html":content,"code":u.status_code,"url":url}

    except Exception as e:
        return get_html_phantomJS(url)

# 使用PhantomJS获取网页源码

Source File: selenium.py From SerpScrap with MIT License

6 votes

def _get_webdriver(self):
        """Return a webdriver instance and set it up
        with the according profile/ proxies.
        Chrome is quite fast, but not as stealthy as PhantomJS.
        Returns:
            The appropriate webdriver mode according to self.browser_type.
            If no webdriver mode could be found, return False.
        """
        if self.browser_type == 'chrome':
            return self._get_Chrome()
        elif self.browser_type == 'firefox':
            return self._get_Firefox()
        elif self.browser_type == 'phantomjs':
            return self._get_PhantomJS()

        return False

Source File: environment.py From pixelated-user-agent with GNU Affero General Public License v3.0

6 votes

def _setup_webdriver(context):
    browser = context.config.userdata.get('webdriver', 'chrome')
    supported_webdrivers = {
        'phantomjs': webdriver.PhantomJS,
        'firefox': webdriver.Firefox,
        'chrome': webdriver.Chrome,
    }

    try:
        context.browser = supported_webdrivers[browser]()
    except KeyError:
        raise UnsuportedWebDriverError('{} is not a supported webdriver'.format(browser))

    context.browser.set_window_size(1280, 1024)
    context.browser.implicitly_wait(DEFAULT_IMPLICIT_WAIT_TIMEOUT_IN_S)
    context.browser.set_page_load_timeout(60)

Source File: isp_data_pollution.py From isp-data-pollution with MIT License

6 votes

def add_url_links(self,links,url=''):
        k = 0
        for link in sorted(links,key=lambda k: random.random()):
            lp = uprs.urlparse(link)
            if (lp.scheme == 'http' or lp.scheme == 'https') and not self.blacklisted(link):
                if self.add_link(link): k += 1
                if k > self.max_links_per_page: break
        if self.verbose or self.debug:
            current_url = url  # default
            try:
                @self.chromedriver_short_timeout
                def chromedriver_current_url(): return self.driver.current_url
                current_url = chromedriver_current_url()
                # the current_url method breaks on a lot of sites, e.g.
                # python3 -c 'from selenium import webdriver; driver = webdriver.PhantomJS(); driver.get("https://github.com"); print(driver.title); print(driver.current_url); driver.quit()'
            except Exception as e:
                if self.debug: print(f'.current_url exception:\n{e}')
        if self.debug:
            print("{}: {:d} links added, {:d} total, {:.1f} bits domain entropy".format(current_url,k,self.link_count(),self.domain_entropy()))
        elif self.verbose:
            self.print_progress(current_url,num_links=k)

Source File: Screenshot_to_pdf.py From crawlBaiduWenku with MIT License

5 votes

def parse_pdf(url,wenku_title):
    print('此过程较慢请稍后')
    browser=webdriver.PhantomJS()
    browser.get(url)
    browser.maximize_window()
    time.sleep(3)
    try:
        con_read(browser)
    except:
        pass
    screenshot(browser,wenku_title)

Source File: linkedinViewerBot.py From FunUtils with MIT License

5 votes

def Launch():
    # Check if the file 'config' exists, otherwise quit
    if os.path.isfile('config') == False:
        print ('Error! No configuration file.')
        sys.exit()
    # Check if the file 'visitedUsers.txt' exists, otherwise create it
    if os.path.isfile('visitedUsers.txt') == False:
        visitedUsersFile = open('visitedUsers.txt', 'wb')
        visitedUsersFile.close()

    # Browser choice
    print ('Choose your browser:')
    print ('[1] Chrome')
    print ('[2] Firefox/Iceweasel')
    print ('[3] Firefox/Iceweasel (light)')
    print ('[4] PhantomJS')
    print ('[5] PhantomJS (light)')

    while True:
        try:
            browserChoice = int(input('Choice? '))
        except ValueError:
            print ('Invalid choice.')
        else:
            if browserChoice not in [1,2,3,4,5]:
                print ('Invalid choice.')
            else:
                break

    StartBrowser(browserChoice)

Source File: code_verification.py From weibo-keywords-crawler with MIT License

5 votes

def test():
    driver = webdriver.PhantomJS()
    driver.get('http://s.weibo.com/ajax/pincode/pin?type=sass&ts=1405404856')
    verify_user(driver)
    return

Source File: 21 TaoBaoInfo.py From Python-Spider with Apache License 2.0

5 votes

def __init__(self):
        self.dirName = 'MyTaoBaoInfo'
        self.driver = webdriver.PhantomJS(executable_path='./phantomjs-2.1.1-macosx/bin/phantomjs')
        self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0)'}

    # 获取页面内容提取

Source File: screenshot.py From N4xD0rk with GNU General Public License v3.0

5 votes

def snapshot(url):
	driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true']) # or add to your PATH
	driver.set_page_load_timeout(15)
	driver.set_window_size(1024, 768) # optional
	try:
		driver.get('https://{0}'.format(url)) 
	except:
		driver.get('http://{0}'.format(url))
	driver.save_screenshot(url+".png")
	return

Source File: web_driver_pool.py From Sasila with Apache License 2.0

5 votes

def _get_base_driver():
    if default_settings.PHANTOMJS_SERVICE:
        web = webdriver.PhantomJS(service_args=default_settings.PHANTOMJS_SERVICE, executable_path=default_settings.PHANTOMJS_PATH
                                  , desired_capabilities=dcap)
    else:
        web = webdriver.PhantomJS(executable_path=default_settings.PHANTOMJS_PATH
                                  , desired_capabilities=dcap)
    return web

Source File: 03 douYuUnittest.py From Python-Spider with Apache License 2.0

5 votes

def setUp(self):
        self.driver = webdriver.PhantomJS(executable_path="./phantomjs-2.1.1-macosx/bin/phantomjs")

    #具体的测试用例方法，一定要以test开头

Source File: webdriver.py From expressvpn_leak_testing with MIT License

5 votes

def driver(self, browser):
        if browser == 'firefox':
            return webdriver.Firefox()
        if browser == 'chrome':
            return webdriver.Chrome()
        if browser == 'safari':
            return webdriver.Safari()
        if browser == 'opera':
            return webdriver.Opera()
        if browser == 'phantom':
            return webdriver.PhantomJS()
        raise XVEx("{} is not supported on {}".format(browser, self._device.os_name()))

Source File: phantom_js_spider.py From LagouJob with Apache License 2.0

5 votes

def download_phantom_js():
    download_url_map = {
        'Windows': 'https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-windows.zip',
        'Linux': 'https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-i686.tar.bz2'
    }

    if platform.system() == 'Windows':
        if not os.path.exists(
                './PhantomJS/%s/bin/phantomjs.exe' % download_url_map[platform.system()].split('/')[-1]
                        .replace('.zip', '')):
            print('Downloading PhantomJS, please wait...')
            mkdirs_if_not_exists('./PhantomJS/')
            response = requests.get(download_url_map[platform.system()], stream=True, timeout=100)
            total_size = int(response.headers.get('content-length', 0))
            block_size = 1024
            wrote = 0

            with open('./PhantomJS/%s' % download_url_map[platform.system()].split('/')[-1], mode='wb') as f:
                for data in tqdm(response.iter_content(block_size), total=math.ceil(total_size // block_size),
                                 unit='KB', unit_scale=True):
                    wrote = wrote + len(data)
                    f.write(data)

            if total_size != 0 and wrote != total_size:
                print("ERROR, something went wrong")

            zip_ref = zipfile.ZipFile('./PhantomJS/%s' % download_url_map[platform.system()].split('/')[-1], 'r')
            zip_ref.extractall('./PhantomJS')
            zip_ref.close()
        else:
            print('PhantomJS exists.')
            driver = webdriver.PhantomJS(executable_path=executable_path)

Source File: middleware.py From daywatch with MIT License

5 votes

def __init__(self):
        self.driver = webdriver.PhantomJS()

Source File: spider_selenium_phantomjs.py From SmallReptileTraining with MIT License

5 votes

def __init__(self, qq='', pwd=None):
        self.driver = webdriver.PhantomJS()  #Run in Ubuntu, Windows need set executable_path.
        self.driver.maximize_window()
        self.qq = qq
        self.pwd = pwd
        print('webdriver start init success!')

Source File: TraTicketBooker.py From TRA-Ticket-Booker with GNU General Public License v3.0

5 votes

def start_up_driver(self):
        driver = webdriver.PhantomJS()
        return driver

Source File: webdriver.py From expressvpn_leak_testing with MIT License

5 votes

def driver(self, browser):
        self._start_server()
        command_executor = 'http://127.0.0.1:4444/wd/hub'

        if browser == 'firefox':
            return webdriver.Remote(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.FIREFOX)
        if browser == 'chrome':
            return webdriver.Remote(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.CHROME)
        if browser == 'phantom':
            return webdriver.PhantomJS()
        raise XVEx("{} is not supported on {}".format(browser, self._device.os_name()))

Python selenium.webdriver.PhantomJS() Examples