Python selenium.webdriver.PhantomJS() Examples

The following are 30 code examples of selenium.webdriver.PhantomJS(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module selenium.webdriver , or try the search function .
Example #1
Source File: screenshot.py    From AboveTustin with MIT License 8 votes vote down vote up
def loadmap(self):
        '''
        loadmap()
        Creates a browser object and loads the webpage.
        It sets up the map to the proper zoom level.

        Returns the browser on success, None on fail.
        '''
        browser = webdriver.PhantomJS(desired_capabilities={'phantomjs.page.settings.resourceTimeout': '20000'})
        browser.set_window_size(abovetustin_image_width, abovetustin_image_height)

        print("getting web page {}".format(self.url))
        browser.set_page_load_timeout(15)
        browser.get(self.url)

        # Need to wait for the page to load
        timeout = g_request_timeout
        print ("waiting for page to load...")
        wait = WebDriverWait(browser, timeout)
        element = wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'vrsMenu')))
        self.browser = browser 
Example #2
Source File: base.py    From syncPlaylist with MIT License 8 votes vote down vote up
def __init__(self):
        self.browse = None
        self.source_playlist = None
        self.target_playlist_tag = None
        self.success_list = list()
        self.failed_list = list()
        os.environ["webdriver.chrome.driver"] = chrome_driver_path
        os.environ["webdriver.phantomjs.driver"] = phantomjs_driver_path
        # chromedriver = chrome_driver_path
        phantomjs_driver = phantomjs_driver_path

        opts = Options()
        opts.add_argument("user-agent={}".format(headers["User-Agent"]))
        # browser = webdriver.Chrome(chromedriver)
        browser = webdriver.PhantomJS(phantomjs_driver)
        self.browser = browser
        self.wait = ui.WebDriverWait(self.browser, 5)
        self.config = Config() 
Example #3
Source File: scrape_espncricinfo.py    From Awesome-Scripts with MIT License 7 votes vote down vote up
def get_latest_wallpapers():
    browser = webdriver.PhantomJS(PHANTOMJS_PATH, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
    today_date = time.strftime("%d+%b+%Y")  
    yesterday = datetime.now() - timedelta(days=1)
    yesterday_date = yesterday.strftime('%d+%b+%Y')
    first_page_url = 'http://www.espncricinfo.com/ci/content/image/?datefrom='+yesterday_date+'&dateupto='+today_date+';'
    browser.get(first_page_url)
    wait = WebDriverWait(browser, 10)
    wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "img-wrap")))
    time.sleep(2)
    # let's parse our html
    soup = BeautifulSoup(browser.page_source, "html.parser")
    images = soup.find_all('div', class_='picture')
    for image in images:
        url = "http://www.espncricinfo.com/" + image.find('a').get('href')
        print(url) 
Example #4
Source File: worker.py    From alma-slipsomat with MIT License 6 votes vote down vote up
def get_driver(self):
        # Start a new browser and return the WebDriver

        browser_name = self.config.get('selenium', 'browser')

        if browser_name == 'firefox':
            from selenium.webdriver import Firefox

            return Firefox()

        if browser_name == 'chrome':
            from selenium.webdriver import Chrome

            return Chrome()

        if browser_name == 'phantomjs':
            from selenium.webdriver import PhantomJS

            return PhantomJS()

        # @TODO: Add chrome
        raise RuntimeError('Unsupported/unknown browser') 
Example #5
Source File: KDLHASpider.py    From Pansidong with GNU General Public License v3.0 6 votes vote down vote up
def my_run(self, page):
        raw_url = "http://www.kuaidaili.com/proxylist/{page}/"
        url = raw_url.replace("{page}", str(page))
        logger.debug(url)
        driver = webdriver.PhantomJS(executable_path=self.phantomjs_path)
        driver.get(url)
        raw_html = driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")

        soup = BeautifulSoup(raw_html, "html5lib")
        t_result = list()
        for tr in soup.find_all("tr")[1:]:
            each_item = {}
            td = tr.find_all("td")

            # 填充数据
            each_item['ip'] = td[0].get_text()
            each_item['port'] = td[1].get_text()
            each_item['type'] = td[2].get_text()
            each_item['protocol'] = td[3].get_text().replace(", ", "-")
            each_item['location'] = td[5].get_text()
            each_item['time'] = filter(lambda ch: ch in '0123456789.', td[6].get_text().encode("utf8"))
            t_result.append(each_item)
        return t_result 
Example #6
Source File: bookmark_tasks.py    From crestify with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def fulltext_extract(bookmark):
    browser = webdriver.PhantomJS(service_args=[
        "--ignore-ssl-errors=true",
        "--ssl-protocol=tlsv1",
        "--load-images=no"])
    fulltext_bookmark = Bookmark.query.get(bookmark.id)
    browser.get(fulltext_bookmark.main_url)
    body = browser.find_element_by_tag_name('body')
    bodytext = body.text
    soup = BeautifulSoup4(bodytext)
    full_text = soup.text
    full_text = " ".join(full_text.split())
    full_text = full_text.replace('\n', '')
    full_text = full_text.encode('utf-8')
    fulltext_bookmark.full_text = full_text
    db.session.commit()
    browser.quit() 
Example #7
Source File: helpers.py    From ODIN with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def setup_phantomjs():
    """Create and return a PhantomJS browser object."""
    try:
        # Setup capabilities for the PhantomJS browser
        phantomjs_capabilities = DesiredCapabilities.PHANTOMJS
        # Some basic creds to use against an HTTP Basic Auth prompt
        phantomjs_capabilities['phantomjs.page.settings.userName'] = 'none'
        phantomjs_capabilities['phantomjs.page.settings.password'] = 'none'
        # Flags to ignore SSL problems and get screenshots
        service_args = []
        service_args.append('--ignore-ssl-errors=true')
        service_args.append('--web-security=no')
        service_args.append('--ssl-protocol=any')
        # Create the PhantomJS browser and set the window size
        browser = webdriver.PhantomJS(desired_capabilities=phantomjs_capabilities,service_args=service_args)
        browser.set_window_size(1920,1080)
    except Exception as error:
        click.secho("[!] Bad news: PhantomJS failed to load (not installed?), so activities \
requiring a web browser will be skipped.",fg="red")
        click.secho("L.. Details: {}".format(error),fg="red")
        browser = None
    return browser 
Example #8
Source File: pytest_dallinger.py    From Dallinger with MIT License 6 votes vote down vote up
def selenium_recruits(request, recruitment_loop):
    def recruits():
        for url in recruitment_loop:
            kwargs = {}
            driver_class = DRIVER_MAP.get(request.param, webdriver.PhantomJS)
            if driver_class is webdriver.PhantomJS:
                # PhantomJS needs a new local storage for every run
                tmpdirname = tempfile.mkdtemp()
                kwargs = {
                    "service_args": ["--local-storage-path={}".format(tmpdirname)],
                }
            driver = driver_class(**kwargs)
            driver.get(url)
            try:
                yield driver
            finally:
                try:
                    driver.quit()
                except Exception:
                    pass

    yield recruits() 
Example #9
Source File: accounts.py    From twitter-accounts-creator-bot with MIT License 6 votes vote down vote up
def getWebdriver(self, driverType):
		if driverType == 'proxy':
			profile = webdriver.FirefoxProfile()
			profile.set_preference( "network.proxy.type", 1 )
			profile.set_preference( "network.proxy.socks", "127.0.0.1" )
			profile.set_preference( "network.proxy.socks_port", 9150 )
			profile.set_preference( "network.proxy.socks_remote_dns", True )
			profile.set_preference( "places.history.enabled", False )
			profile.set_preference( "privacy.clearOnShutdown.offlineApps", True )
			profile.set_preference( "privacy.clearOnShutdown.passwords", True )
			profile.set_preference( "privacy.clearOnShutdown.siteSettings", True )
			profile.set_preference( "privacy.sanitize.sanitizeOnShutdown", True )
			profile.set_preference( "signon.rememberSignons", False )
			profile.set_preference( "network.cookie.lifetimePolicy", 2 )
			profile.set_preference( "network.dns.disablePrefetch", True )
			profile.set_preference( "network.http.sendRefererHeader", 0 )
			profile.set_preference( "javascript.enabled", False )
			profile.set_preference( "permissions.default.image", 2 )
			return webdriver.Firefox(profile)
		elif driverType == 'headless':
			return webdriver.PhantomJS()
		else:
			return webdriver.Firefox() 
Example #10
Source File: readcomic.py    From ReadComicOnline-Downloader with MIT License 6 votes vote down vote up
def Single_Issue(url,Quality):
	#print url
	print 'Quality To Download : ',Quality[0]
	print 'Order To Download : ',Quality[1]
	#sys.exit()
	#print url,' This is first'
	
	browser = webdriver.PhantomJS(service_args=['--load-images=no'])
	browser.get(url)
	try:
		element = WebDriverWait(browser, 10).until(
			EC.presence_of_element_located((By.ID, "stSegmentFrame"))
		)
		#print 'Downloading the whole page! Will take some time, please don\'t close this script...\n'
		#print 'I\'ve waited long enough'
	except Exception, e:
		#raise e
		browser.save_screenshot('Single_exception.png')
		print e
		pass 
Example #11
Source File: main.py    From SneakerBotTutorials with MIT License 6 votes vote down vote up
def createHeadlessBrowser(proxy=None, XResolution=1024, YResolution=768, timeout=20):
	#proxy = None
	if TEST_MODE == False:
		dcap = dict(DesiredCapabilities.PHANTOMJS)
		dcap["phantomjs.page.settings.userAgent"] = (
		    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.86 Safari/537.36')
		# Fake browser headers
		if proxy != None:
			# This means the user set a proxy
			service_args = ['--proxy={}'.format(proxy),'--proxy-type=https','--ignore-ssl-errors=true', '--ssl-protocol=any', '--web-security=false',]
			driver = webdriver.PhantomJS(service_args=service_args, desired_capabilities=dcap)
		else:
			# No proxy was set by the user
			driver = webdriver.PhantomJS(desired_capabilities=dcap)
		driver.set_window_size(XResolution,YResolution)
		# Sets the screen resolution
		# Ideally this will be dynamic based on the number of browsers open
		driver.set_page_load_timeout(timeout)
		# Sets the timeout for the selenium window
	else:
		driver = webdriver.Firefox()
	return driver
	# Returns driver instance 
Example #12
Source File: form.py    From cmdbac with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        super(FormSpider, self).__init__(*args, **kwargs)

        self.start_urls = [kwargs.get('start_url')]

        follow = True if kwargs.get('follow') == 'true' else False
        self.rules = (
            Rule (SgmlLinkExtractor(allow=('')), callback='parse_form', follow=follow),
        )
        super(FormSpider, self)._compile_rules()

        try:
            proxy = kwargs.get('proxy')
            service_args = [
                '--proxy=' + proxy,
                '--proxy-type=http',
            ]
        except:
            service_args = None
        self.browser = webdriver.PhantomJS(service_args=service_args) 
Example #13
Source File: headless.py    From darklight with Apache License 2.0 6 votes vote down vote up
def __init__(self, ini, load_image=False, tor_network=False):
        service_args = []
        service_log_path = '/dev/null'

        # if browser don't load image
        if not load_image:
            service_args.append('--load-images=no')

        # selenium connect tor proxy
        if tor_network:
            proxy = '{}:{}'.format(
                ini.read('TOR', 'HOST'),
                ini.read('TOR', 'PORT'))

            service_args.append('--proxy={}'.format(proxy))
            service_args.append('--proxy-type=socks5')

        self.driver = webdriver.PhantomJS(
            executable_path=ini.read('HEADLESS', 'PATH'),
            service_args=service_args,
            service_log_path=service_log_path
        ) 
Example #14
Source File: struts2scan.py    From ZEROScan with MIT License 6 votes vote down vote up
def gethtml(url):
    try:
        headers = {}

        headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
        headers['Accept-Language'] ='en-us;q=0.5,en;q=0.3'
        headers['Referer'] = url

        u = requests.get(url, timeout=3, headers=headers, allow_redirects=True)
        content = u.text
        return {"html":content,"code":u.status_code,"url":url}

    except Exception as e:
        return get_html_phantomJS(url)

# 使用PhantomJS获取网页源码 
Example #15
Source File: selenium.py    From SerpScrap with MIT License 6 votes vote down vote up
def _get_webdriver(self):
        """Return a webdriver instance and set it up
        with the according profile/ proxies.
        Chrome is quite fast, but not as stealthy as PhantomJS.
        Returns:
            The appropriate webdriver mode according to self.browser_type.
            If no webdriver mode could be found, return False.
        """
        if self.browser_type == 'chrome':
            return self._get_Chrome()
        elif self.browser_type == 'firefox':
            return self._get_Firefox()
        elif self.browser_type == 'phantomjs':
            return self._get_PhantomJS()

        return False 
Example #16
Source File: environment.py    From pixelated-user-agent with GNU Affero General Public License v3.0 6 votes vote down vote up
def _setup_webdriver(context):
    browser = context.config.userdata.get('webdriver', 'chrome')
    supported_webdrivers = {
        'phantomjs': webdriver.PhantomJS,
        'firefox': webdriver.Firefox,
        'chrome': webdriver.Chrome,
    }

    try:
        context.browser = supported_webdrivers[browser]()
    except KeyError:
        raise UnsuportedWebDriverError('{} is not a supported webdriver'.format(browser))

    context.browser.set_window_size(1280, 1024)
    context.browser.implicitly_wait(DEFAULT_IMPLICIT_WAIT_TIMEOUT_IN_S)
    context.browser.set_page_load_timeout(60) 
Example #17
Source File: isp_data_pollution.py    From isp-data-pollution with MIT License 6 votes vote down vote up
def add_url_links(self,links,url=''):
        k = 0
        for link in sorted(links,key=lambda k: random.random()):
            lp = uprs.urlparse(link)
            if (lp.scheme == 'http' or lp.scheme == 'https') and not self.blacklisted(link):
                if self.add_link(link): k += 1
                if k > self.max_links_per_page: break
        if self.verbose or self.debug:
            current_url = url  # default
            try:
                @self.chromedriver_short_timeout
                def chromedriver_current_url(): return self.driver.current_url
                current_url = chromedriver_current_url()
                # the current_url method breaks on a lot of sites, e.g.
                # python3 -c 'from selenium import webdriver; driver = webdriver.PhantomJS(); driver.get("https://github.com"); print(driver.title); print(driver.current_url); driver.quit()'
            except Exception as e:
                if self.debug: print(f'.current_url exception:\n{e}')
        if self.debug:
            print("{}: {:d} links added, {:d} total, {:.1f} bits domain entropy".format(current_url,k,self.link_count(),self.domain_entropy()))
        elif self.verbose:
            self.print_progress(current_url,num_links=k) 
Example #18
Source File: Screenshot_to_pdf.py    From crawlBaiduWenku with MIT License 5 votes vote down vote up
def parse_pdf(url,wenku_title):
    print('此过程较慢请稍后')
    browser=webdriver.PhantomJS()
    browser.get(url)
    browser.maximize_window()
    time.sleep(3)
    try:
        con_read(browser)
    except:
        pass
    screenshot(browser,wenku_title) 
Example #19
Source File: linkedinViewerBot.py    From FunUtils with MIT License 5 votes vote down vote up
def Launch():
    # Check if the file 'config' exists, otherwise quit
    if os.path.isfile('config') == False:
        print ('Error! No configuration file.')
        sys.exit()
    # Check if the file 'visitedUsers.txt' exists, otherwise create it
    if os.path.isfile('visitedUsers.txt') == False:
        visitedUsersFile = open('visitedUsers.txt', 'wb')
        visitedUsersFile.close()

    # Browser choice
    print ('Choose your browser:')
    print ('[1] Chrome')
    print ('[2] Firefox/Iceweasel')
    print ('[3] Firefox/Iceweasel (light)')
    print ('[4] PhantomJS')
    print ('[5] PhantomJS (light)')

    while True:
        try:
            browserChoice = int(input('Choice? '))
        except ValueError:
            print ('Invalid choice.')
        else:
            if browserChoice not in [1,2,3,4,5]:
                print ('Invalid choice.')
            else:
                break

    StartBrowser(browserChoice) 
Example #20
Source File: code_verification.py    From weibo-keywords-crawler with MIT License 5 votes vote down vote up
def test():
    driver = webdriver.PhantomJS()
    driver.get('http://s.weibo.com/ajax/pincode/pin?type=sass&ts=1405404856')
    verify_user(driver)
    return 
Example #21
Source File: 21 TaoBaoInfo.py    From Python-Spider with Apache License 2.0 5 votes vote down vote up
def __init__(self):
        self.dirName = 'MyTaoBaoInfo'
        self.driver = webdriver.PhantomJS(executable_path='./phantomjs-2.1.1-macosx/bin/phantomjs')
        self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0)'}

    # 获取页面内容提取 
Example #22
Source File: screenshot.py    From N4xD0rk with GNU General Public License v3.0 5 votes vote down vote up
def snapshot(url):
	driver = webdriver.PhantomJS(service_args=['--ignore-ssl-errors=true']) # or add to your PATH
	driver.set_page_load_timeout(15)
	driver.set_window_size(1024, 768) # optional
	try:
		driver.get('https://{0}'.format(url)) 
	except:
		driver.get('http://{0}'.format(url))
	driver.save_screenshot(url+".png")
	return 
Example #23
Source File: web_driver_pool.py    From Sasila with Apache License 2.0 5 votes vote down vote up
def _get_base_driver():
    if default_settings.PHANTOMJS_SERVICE:
        web = webdriver.PhantomJS(service_args=default_settings.PHANTOMJS_SERVICE, executable_path=default_settings.PHANTOMJS_PATH
                                  , desired_capabilities=dcap)
    else:
        web = webdriver.PhantomJS(executable_path=default_settings.PHANTOMJS_PATH
                                  , desired_capabilities=dcap)
    return web 
Example #24
Source File: 03 douYuUnittest.py    From Python-Spider with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        self.driver = webdriver.PhantomJS(executable_path="./phantomjs-2.1.1-macosx/bin/phantomjs")

    #具体的测试用例方法,一定要以test开头 
Example #25
Source File: webdriver.py    From expressvpn_leak_testing with MIT License 5 votes vote down vote up
def driver(self, browser):
        if browser == 'firefox':
            return webdriver.Firefox()
        if browser == 'chrome':
            return webdriver.Chrome()
        if browser == 'safari':
            return webdriver.Safari()
        if browser == 'opera':
            return webdriver.Opera()
        if browser == 'phantom':
            return webdriver.PhantomJS()
        raise XVEx("{} is not supported on {}".format(browser, self._device.os_name())) 
Example #26
Source File: phantom_js_spider.py    From LagouJob with Apache License 2.0 5 votes vote down vote up
def download_phantom_js():
    download_url_map = {
        'Windows': 'https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-windows.zip',
        'Linux': 'https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-i686.tar.bz2'
    }

    if platform.system() == 'Windows':
        if not os.path.exists(
                './PhantomJS/%s/bin/phantomjs.exe' % download_url_map[platform.system()].split('/')[-1]
                        .replace('.zip', '')):
            print('Downloading PhantomJS, please wait...')
            mkdirs_if_not_exists('./PhantomJS/')
            response = requests.get(download_url_map[platform.system()], stream=True, timeout=100)
            total_size = int(response.headers.get('content-length', 0))
            block_size = 1024
            wrote = 0

            with open('./PhantomJS/%s' % download_url_map[platform.system()].split('/')[-1], mode='wb') as f:
                for data in tqdm(response.iter_content(block_size), total=math.ceil(total_size // block_size),
                                 unit='KB', unit_scale=True):
                    wrote = wrote + len(data)
                    f.write(data)

            if total_size != 0 and wrote != total_size:
                print("ERROR, something went wrong")

            zip_ref = zipfile.ZipFile('./PhantomJS/%s' % download_url_map[platform.system()].split('/')[-1], 'r')
            zip_ref.extractall('./PhantomJS')
            zip_ref.close()
        else:
            print('PhantomJS exists.')
            driver = webdriver.PhantomJS(executable_path=executable_path) 
Example #27
Source File: middleware.py    From daywatch with MIT License 5 votes vote down vote up
def __init__(self):
        self.driver = webdriver.PhantomJS() 
Example #28
Source File: spider_selenium_phantomjs.py    From SmallReptileTraining with MIT License 5 votes vote down vote up
def __init__(self, qq='', pwd=None):
        self.driver = webdriver.PhantomJS()  #Run in Ubuntu, Windows need set executable_path.
        self.driver.maximize_window()
        self.qq = qq
        self.pwd = pwd
        print('webdriver start init success!') 
Example #29
Source File: TraTicketBooker.py    From TRA-Ticket-Booker with GNU General Public License v3.0 5 votes vote down vote up
def start_up_driver(self):
        driver = webdriver.PhantomJS()
        return driver 
Example #30
Source File: webdriver.py    From expressvpn_leak_testing with MIT License 5 votes vote down vote up
def driver(self, browser):
        self._start_server()
        command_executor = 'http://127.0.0.1:4444/wd/hub'

        if browser == 'firefox':
            return webdriver.Remote(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.FIREFOX)
        if browser == 'chrome':
            return webdriver.Remote(command_executor=command_executor,
                                    desired_capabilities=DesiredCapabilities.CHROME)
        if browser == 'phantom':
            return webdriver.PhantomJS()
        raise XVEx("{} is not supported on {}".format(browser, self._device.os_name()))