Python retrying.RetryError() Examples

The following are 22 code examples of retrying.RetryError(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module retrying , or try the search function .
Example #1
Source File: test_api.py    From dicomweb-client with MIT License 6 votes vote down vote up
def test_store_instance_error_with_retries(httpserver, client, cache_dir):
    dataset = load_json_dataset({})
    dataset.is_little_endian = True
    dataset.is_implicit_VR = True
    max_attempts = 2
    client.set_http_retry_params(
        retry=True,
        max_attempts=max_attempts,
        wait_exponential_multiplier=10
    )
    httpserver.serve_content(
        content='',
        code=HTTPStatus.REQUEST_TIMEOUT,
        headers=''
    )
    with pytest.raises(RetryError):
        client.store_instances([dataset])
    assert len(httpserver.requests) == max_attempts
    request = httpserver.requests[0]
    assert request.headers['Content-Type'].startswith(
        'multipart/related; type="application/dicom"'
    ) 
Example #2
Source File: server.py    From presto-admin with Apache License 2.0 6 votes vote down vote up
def check_server_status():
    """
    Checks if server is running for env.host. Retries connecting to server
    until server is up or till RETRY_TIMEOUT is reached

    Parameters:
        client - client that executes the query

    Returns:
        True or False
    """
    if len(get_coordinator_role()) < 1:
        warn('No coordinator defined.  Cannot verify server status.')
    with closing(PrestoClient(get_coordinator_role()[0], env.user)) as client:
        node_id = lookup_string_config('node.id', os.path.join(constants.REMOTE_CONF_DIR, 'node.properties'), env.host)

        try:
            return query_server_for_status(client, node_id)
        except RetryError:
            return False 
Example #3
Source File: test_api.py    From dicomweb-client with MIT License 6 votes vote down vote up
def test_search_for_studies_with_retries(httpserver, client, cache_dir):
    headers = {'content-type': 'application/dicom+json'}
    max_attempts = 3
    client.set_http_retry_params(
        retry=True,
        max_attempts=max_attempts,
        wait_exponential_multiplier=10
    )
    httpserver.serve_content(
        content='',
        code=HTTPStatus.REQUEST_TIMEOUT,
        headers=headers
    )
    with pytest.raises(RetryError):
        client.search_for_studies()
    assert len(httpserver.requests) == max_attempts 
Example #4
Source File: sender.py    From AdslProxy with MIT License 5 votes vote down vote up
def run(self):
        """
        拨号主进程
        :return: None
        """
        logger.info('Dial started, remove proxy')
        try:
            self.remove_proxy()
        except RetryError:
            logger.error('Retried for max times, continue')
        # 拨号
        (status, output) = subprocess.getstatusoutput(DIAL_BASH)
        if not status == 0:
            logger.error('Dial failed')
        # 获取拨号 IP
        ip = self.extract_ip()
        if ip:
            logger.info(f'Get new IP {ip}')
            if PROXY_USERNAME and PROXY_PASSWORD:
                proxy = '{username}:{password}@{ip}:{port}'.format(username=PROXY_USERNAME,
                                                                   password=PROXY_PASSWORD,
                                                                   ip=ip, port=PROXY_PORT)
            else:
                proxy = '{ip}:{port}'.format(ip=ip, port=PROXY_PORT)
            time.sleep(10)
            if self.test_proxy(proxy):
                logger.info(f'Valid proxy {proxy}')
                # 将代理放入数据库
                self.set_proxy(proxy)
                time.sleep(DIAL_CYCLE)
            else:
                logger.error(f'Proxy invalid {proxy}')
        else:
            # 获取 IP 失败,重新拨号
            logger.error('Get IP failed, re-dialing')
            self.run() 
Example #5
Source File: test_bucket_mover_service.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def test_create_bucket_api_call_retry(self, mock_write_spinner_and_log,
                                          mock_bucket):
        """Tests the method is retried 5 times when the exception happens."""
        mock_bucket.create.side_effect = exceptions.ServiceUnavailable('503')
        with self.assertRaises(RetryError):
            result = bucket_mover_service._create_bucket_api_call(
                mock.MagicMock(), mock.MagicMock(), mock_bucket)
            self.assertEqual(5, mock_bucket.create.call_count)
            self.assertEqual(5, mock_write_spinner_and_log.call_count)
            self.assertFalse(result) 
Example #6
Source File: test_mgmt.py    From nova-powervm with Apache License 2.0 5 votes vote down vote up
def test_discover_vscsi_disk_not_one_result(self, mock_write, mock_glob,
                                                mock_retry):
        """Zero or more than one disk is found by discover_vscsi_disk."""
        def validate_retry(kwargs):
            self.assertIn('retry_on_result', kwargs)
            self.assertEqual(250, kwargs['wait_fixed'])
            self.assertEqual(300000, kwargs['stop_max_delay'])

        def raiser(unused):
            raise retrying.RetryError(mock.Mock(attempt_number=123))

        def retry_passthrough(**kwargs):
            validate_retry(kwargs)

            def wrapped(_poll_for_dev):
                return _poll_for_dev
            return wrapped

        def retry_timeout(**kwargs):
            validate_retry(kwargs)

            def wrapped(_poll_for_dev):
                return raiser
            return wrapped

        udid = ('275b5d5f88fa5611e48be9000098be9400'
                '13fb2aa55a2d7b8d150cb1b7b6bc04d6')
        mapping = mock.Mock()
        mapping.client_adapter.lpar_slot_num = 5
        mapping.backing_storage.udid = udid
        # No disks found
        mock_retry.side_effect = retry_timeout
        mock_glob.side_effect = lambda path: []
        self.assertRaises(npvmex.NoDiskDiscoveryException,
                          mgmt.discover_vscsi_disk, mapping)
        # Multiple disks found
        mock_retry.side_effect = retry_passthrough
        mock_glob.side_effect = [['path'], ['/dev/sde', '/dev/sdf']]
        self.assertRaises(npvmex.UniqueDiskDiscoveryException,
                          mgmt.discover_vscsi_disk, mapping) 
Example #7
Source File: test_mgmt.py    From nova-powervm with Apache License 2.0 5 votes vote down vote up
def test_remove_block_dev_timeout(self, mock_dacw, mock_stat,
                                      mock_realpath, mock_retry):

        def validate_retry(kwargs):
            self.assertIn('retry_on_result', kwargs)
            self.assertEqual(250, kwargs['wait_fixed'])
            self.assertEqual(10000, kwargs['stop_max_delay'])

        def raiser(unused):
            raise retrying.RetryError(mock.Mock(attempt_number=123))

        def retry_timeout(**kwargs):
            validate_retry(kwargs)

            def wrapped(_poll_for_del):
                return raiser
            return wrapped

        # Deletion was attempted, but device is still there
        link = '/dev/link/foo'
        delpath = '/sys/block/sde/device/delete'
        realpath = '/dev/sde'
        mock_realpath.return_value = realpath
        mock_stat.side_effect = lambda path: 1
        mock_retry.side_effect = retry_timeout

        self.assertRaises(
            npvmex.DeviceDeletionException, mgmt.remove_block_dev, link)
        mock_realpath.assert_called_once_with(link)
        mock_dacw.assert_called_with(delpath, 'a', '1') 
Example #8
Source File: test_helpers.py    From dcos with Apache License 2.0 5 votes vote down vote up
def docker_pull_image(image: str) -> bool:
    log.info("\n Ensure docker image availability ahead of tests.")
    try:
        subprocess.run(["sudo", "docker", "pull", image], check=True)
        return True
    except retrying.RetryError:
        return False 
Example #9
Source File: _wait_for_dcos.py    From dcos-e2e with Apache License 2.0 5 votes vote down vote up
def _test_utils_wait_for_dcos(
    session: Union[DcosApiSession, EnterpriseApiSession],
) -> None:
    """
    Wait for DC/OS using DC/OS Test Utils.

    DC/OS Test Utils raises its own timeout, a ``retrying.RetryError``.
    We want to ignore this error and use our own timeouts, so we wrap this in
    our own retried function.
    """
    session.wait_for_dcos()  # type: ignore 
Example #10
Source File: marathon.py    From dcos-e2e with Apache License 2.0 5 votes vote down vote up
def deploy_app(self, app_definition, check_health=True, ignore_failed_tasks=False, timeout=180):
        """Deploy an app to marathon

        This function deploys an an application and then waits for marathon to
        acknowledge it's successful creation or fails the test.

        The wait for application is immediately aborted if Marathon returns
        nonempty 'lastTaskFailure' field (if ignore_failed_tasks is set to False). Otherwise it waits until all the
        instances reach tasksRunning and then tasksHealthy state.

        Args:
            app_definition: a dict with application definition as specified in
                            Marathon API (https://mesosphere.github.io/marathon/docs/rest-api.html#post-v2-apps)
            check_health: wait until Marathon reports tasks as healthy before
                          returning

        Returns:
            A list of named tuples which represent service points of deployed
            applications. I.E:
                [Endpoint(host='172.17.10.202', port=10464), Endpoint(host='172.17.10.201', port=1630)]
        """
        r = self.post('/v2/apps', json=app_definition)
        log.info('Response from marathon: {}'.format(repr(r.json())))
        r.raise_for_status()

        try:
            return self.wait_for_app_deployment(
                    app_definition['id'],
                    app_definition['instances'],
                    check_health, ignore_failed_tasks, timeout)
        except retrying.RetryError:
            raise Exception("Application deployment failed - operation was not "
                            "completed in {} seconds.".format(timeout)) 
Example #11
Source File: activity_retrying.py    From botoflow with Apache License 2.0 5 votes vote down vote up
def call(self, fn, *args, **kwargs):
        start_time = int(round(workflow_time.time() * 1000))
        attempt_number = 1
        while True:
            try:
                val = yield fn(*args, **kwargs)
                attempt = retrying.Attempt(val, attempt_number, False)
            except Exception:
                val = sys.exc_info()
                attempt = retrying.Attempt(val, attempt_number, True)

            if not self.should_reject(attempt):
                return_(attempt.get(self._wrap_exception))

            delay_since_first_attempt_ms = int(round(workflow_time.time() * 1000)) - start_time
            if self.stop(attempt_number, delay_since_first_attempt_ms):
                if not self._wrap_exception and attempt.has_exception:
                    # get() on an attempt with an exception should cause it to be raised, but raise just in case
                    raise attempt.get()
                else:
                    raise RetryError(attempt)
            else:
                # use ceil since SWF timer resolution is in seconds
                sleep = self.wait(attempt_number, delay_since_first_attempt_ms)
                yield workflow_time.sleep(ceil(sleep / 1000.0))

            attempt_number += 1 
Example #12
Source File: marathon.py    From dcos-e2e with Apache License 2.0 5 votes vote down vote up
def destroy_pod(self, pod_id, timeout=300):
        """Remove a marathon pod

        Abort the test if the removal was unsuccessful.

        Args:
            pod_id: id of the pod to remove
            timeout: seconds to wait for destruction before failing test
        """
        @retrying.retry(wait_fixed=5000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: not ret,
                        retry_on_exception=lambda x: False)
        def _destroy_pod_complete(deployment_id):
            r = self.get('/v2/deployments')
            assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)

            for deployment in r.json():
                if deployment_id == deployment.get('id'):
                    log.info('Waiting for pod to be destroyed')
                    return False
            log.info('Pod destroyed')
            return True

        r = self.delete('/v2/pods' + pod_id, params=FORCE_PARAMS)
        assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)

        try:
            _destroy_pod_complete(r.headers['Marathon-Deployment-Id'])
        except retrying.RetryError as ex:
            raise Exception("Pod destroy failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #13
Source File: marathon.py    From dcos-e2e with Apache License 2.0 5 votes vote down vote up
def destroy_pod(self, pod_id, timeout=300):
        """Remove a marathon pod

        Abort the test if the removal was unsuccessful.

        Args:
            pod_id: id of the pod to remove
            timeout: seconds to wait for destruction before failing test
        """
        @retrying.retry(wait_fixed=5000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: not ret,
                        retry_on_exception=lambda x: False)
        def _destroy_pod_complete(deployment_id):
            r = self.get('/v2/deployments')
            assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)

            for deployment in r.json():
                if deployment_id == deployment.get('id'):
                    log.info('Waiting for pod to be destroyed')
                    return False
            log.info('Pod destroyed')
            return True

        r = self.delete('/v2/pods' + pod_id, params=FORCE_PARAMS)
        assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)

        try:
            _destroy_pod_complete(r.headers['Marathon-Deployment-Id'])
        except retrying.RetryError as ex:
            raise Exception("Pod destroy failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #14
Source File: marathon.py    From dcos-e2e with Apache License 2.0 5 votes vote down vote up
def deploy_app(self, app_definition, check_health=True, ignore_failed_tasks=False, timeout=180):
        """Deploy an app to marathon

        This function deploys an an application and then waits for marathon to
        acknowledge it's successful creation or fails the test.

        The wait for application is immediately aborted if Marathon returns
        nonempty 'lastTaskFailure' field (if ignore_failed_tasks is set to False). Otherwise it waits until all the
        instances reach tasksRunning and then tasksHealthy state.

        Args:
            app_definition: a dict with application definition as specified in
                            Marathon API (https://mesosphere.github.io/marathon/docs/rest-api.html#post-v2-apps)
            check_health: wait until Marathon reports tasks as healthy before
                          returning

        Returns:
            A list of named tuples which represent service points of deployed
            applications. I.E:
                [Endpoint(host='172.17.10.202', port=10464), Endpoint(host='172.17.10.201', port=1630)]
        """
        r = self.post('/v2/apps', json=app_definition)
        log.info('Response from marathon: {}'.format(repr(r.json())))
        r.raise_for_status()

        try:
            return self.wait_for_app_deployment(
                    app_definition['id'],
                    app_definition['instances'],
                    check_health, ignore_failed_tasks, timeout)
        except retrying.RetryError:
            raise Exception("Application deployment failed - operation was not "
                            "completed in {} seconds.".format(timeout)) 
Example #15
Source File: marathon.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def deploy_pod(self, pod_definition, timeout=180):
        """Deploy a pod to marathon

        This function deploys an a pod and then waits for marathon to
        acknowledge it's successful creation or fails the test.

        It waits until all the instances reach tasksRunning and then tasksHealthy state.

        Args:
            pod_definition: a dict with pod definition as specified in
                            Marathon API
            timeout: seconds to wait for deployment to finish
        Returns:
            Pod data JSON
        """
        r = self.post('/v2/pods', json=pod_definition)
        assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)
        log.info('Response from marathon: {}'.format(repr(r.json())))

        @retrying.retry(wait_fixed=5000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: ret is False,
                        retry_on_exception=lambda x: False)
        def _wait_for_pod_deployment(pod_id):
            # In the context of the `deploy_pod` function, simply waiting for
            # the pod's status to become STABLE is sufficient. In the future,
            # if test pod deployments become more complex, we should switch to
            # using Marathon's event bus and listen for specific events.
            # See DCOS_OSS-1056.
            r = self.get('/v2/pods' + pod_id + '::status')
            r.raise_for_status()
            data = r.json()
            if 'status' in data and data['status'] == 'STABLE':
                # deployment complete
                return data
            log.info('Waiting for pod to be deployed %r', data)
            return False

        try:
            return _wait_for_pod_deployment(pod_definition['id'])
        except retrying.RetryError as ex:
            raise Exception("Pod deployment failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #16
Source File: marathon.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def deploy_pod(self, pod_definition, timeout=180):
        """Deploy a pod to marathon

        This function deploys an a pod and then waits for marathon to
        acknowledge it's successful creation or fails the test.

        It waits until all the instances reach tasksRunning and then tasksHealthy state.

        Args:
            pod_definition: a dict with pod definition as specified in
                            Marathon API
            timeout: seconds to wait for deployment to finish
        Returns:
            Pod data JSON
        """
        r = self.post('/v2/pods', json=pod_definition)
        assert r.ok, 'status_code: {} content: {}'.format(r.status_code, r.content)
        log.info('Response from marathon: {}'.format(repr(r.json())))

        @retrying.retry(wait_fixed=5000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: ret is False,
                        retry_on_exception=lambda x: False)
        def _wait_for_pod_deployment(pod_id):
            # In the context of the `deploy_pod` function, simply waiting for
            # the pod's status to become STABLE is sufficient. In the future,
            # if test pod deployments become more complex, we should switch to
            # using Marathon's event bus and listen for specific events.
            # See DCOS_OSS-1056.
            r = self.get('/v2/pods' + pod_id + '::status')
            r.raise_for_status()
            data = r.json()
            if 'status' in data and data['status'] == 'STABLE':
                # deployment complete
                return data
            log.info('Waiting for pod to be deployed %r', data)
            return False

        try:
            return _wait_for_pod_deployment(pod_definition['id'])
        except retrying.RetryError as ex:
            raise Exception("Pod deployment failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #17
Source File: jobs.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def wait_for_run(self, job_id: str, run_id: str, timeout=600):
        """Wait for a given run to complete or timeout seconds to
        elapse.

        :param job_id: Job ID
        :type job_id: str
        :param run_id: Run ID
        :type run_id: str
        :param timeout: Time in seconds to wait before giving up
        :type timeout: int
        :return: None

        """

        @retrying.retry(wait_fixed=1000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: ret is False,
                        retry_on_exception=lambda x: False)
        def _wait_for_run_completion(j_id: str, r_id: str) -> bool:
            try:
                # 200 means the run is still in progress
                self.run_details(job_id=j_id, run_id=r_id)
                log.info('Waiting on job run {} to finish.'.format(r_id))
                return False
            except requests.HTTPError as http_error:
                rc = http_error.response

            # 404 means the run is complete and this is done
            # anything else is a problem and should not happen
            if rc.status_code == 404:
                history_available = self._is_history_available(j_id, r_id)
                if history_available:
                    log.info('Job run {} finished.'.format(r_id))
                    return True
                else:
                    log.warning(
                        'Waiting for job run {} to be finished, but history for that job run is not available'
                        .format(r_id))
                    return False
            else:
                raise requests.HTTPError(
                    'Waiting for job run {} to be finished, but getting HTTP status code {}'
                    .format(r_id, rc.status_code), response=rc)

        try:
            # wait for the run to complete and then return the
            # run's result
            _wait_for_run_completion(job_id, run_id)
        except retrying.RetryError as ex:
            raise Exception("Job run failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #18
Source File: arm.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def wait_for_deployment(self, timeout=60 * 60):
        """
        Azure will not register a template instantly after deployment, so
        CloudError must be expected as retried. Once the ops are retrieved, this
        loops through all operations in the group's only deployment
        if any operations are still in progress, then this function will sleep
        once all operations are complete, if there any failures, those will be
        printed to the log stream
        """
        log.info('Waiting for deployment to finish')

        def azure_failure_report():
            deploy_ops = self.azure_wrapper.rmc.deployment_operations.list(
                    self.group_name, DEPLOYMENT_NAME.format(self.group_name))
            failures = [(op.properties.status_code, op.properties.status_message) for op
                        in deploy_ops if op.properties.provisioning_state == 'Failed']
            for failure in failures:
                log.error('Deployment operation failed! {}: {}'.format(*failure))

        @retrying.retry(
            wait_fixed=60 * 1000, stop_max_delay=timeout * 1000,
            retry_on_result=lambda res: res is False,
            retry_on_exception=lambda ex: isinstance(ex, CloudError))
        def check_deployment_operations():
            deploy_state = self.get_deployment_state()

            if deploy_state == 'Succeeded':
                return True

            elif deploy_state == 'Failed':
                log.info('Deployment failed. Checking deployment operations.')
                azure_failure_report()
                raise DeploymentError('Azure Deployment Failed!')

            else:
                log.info('Waiting for deployment. Current state: {}. It should either be Succeeded/Failed.'.format(
                        deploy_state))

                return False

        try:
            check_deployment_operations()
        except retrying.RetryError:
            log.info('Deployment failed. Checking deployment operations.')
            azure_failure_report()
            raise DeploymentError("Azure Deployment Failed!") 
Example #19
Source File: jobs.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def wait_for_run(self, job_id: str, run_id: str, timeout=600):
        """Wait for a given run to complete or timeout seconds to
        elapse.

        :param job_id: Job ID
        :type job_id: str
        :param run_id: Run ID
        :type run_id: str
        :param timeout: Time in seconds to wait before giving up
        :type timeout: int
        :return: None

        """

        @retrying.retry(wait_fixed=1000, stop_max_delay=timeout * 1000,
                        retry_on_result=lambda ret: ret is False,
                        retry_on_exception=lambda x: False)
        def _wait_for_run_completion(j_id: str, r_id: str) -> bool:
            try:
                # 200 means the run is still in progress
                self.run_details(job_id=j_id, run_id=r_id)
                log.info('Waiting on job run {} to finish.'.format(r_id))
                return False
            except requests.HTTPError as http_error:
                rc = http_error.response

            # 404 means the run is complete and this is done
            # anything else is a problem and should not happen
            if rc.status_code == 404:
                history_available = self._is_history_available(j_id, r_id)
                if history_available:
                    log.info('Job run {} finished.'.format(r_id))
                    return True
                else:
                    log.warning(
                        'Waiting for job run {} to be finished, but history for that job run is not available'
                        .format(r_id))
                    return False
            else:
                raise requests.HTTPError(
                    'Waiting for job run {} to be finished, but getting HTTP status code {}'
                    .format(r_id, rc.status_code), response=rc)

        try:
            # wait for the run to complete and then return the
            # run's result
            _wait_for_run_completion(job_id, run_id)
        except retrying.RetryError as ex:
            raise Exception("Job run failed - operation was not "
                            "completed in {} seconds.".format(timeout)) from ex 
Example #20
Source File: arm.py    From dcos-e2e with Apache License 2.0 4 votes vote down vote up
def wait_for_deployment(self, timeout=60 * 60):
        """
        Azure will not register a template instantly after deployment, so
        CloudError must be expected as retried. Once the ops are retrieved, this
        loops through all operations in the group's only deployment
        if any operations are still in progress, then this function will sleep
        once all operations are complete, if there any failures, those will be
        printed to the log stream
        """
        log.info('Waiting for deployment to finish')

        def azure_failure_report():
            deploy_ops = self.azure_wrapper.rmc.deployment_operations.list(
                    self.group_name, DEPLOYMENT_NAME.format(self.group_name))
            failures = [(op.properties.status_code, op.properties.status_message) for op
                        in deploy_ops if op.properties.provisioning_state == 'Failed']
            for failure in failures:
                log.error('Deployment operation failed! {}: {}'.format(*failure))

        @retrying.retry(
            wait_fixed=60 * 1000, stop_max_delay=timeout * 1000,
            retry_on_result=lambda res: res is False,
            retry_on_exception=lambda ex: isinstance(ex, CloudError))
        def check_deployment_operations():
            deploy_state = self.get_deployment_state()

            if deploy_state == 'Succeeded':
                return True

            elif deploy_state == 'Failed':
                log.info('Deployment failed. Checking deployment operations.')
                azure_failure_report()
                raise DeploymentError('Azure Deployment Failed!')

            else:
                log.info('Waiting for deployment. Current state: {}. It should either be Succeeded/Failed.'.format(
                        deploy_state))

                return False

        try:
            check_deployment_operations()
        except retrying.RetryError:
            log.info('Deployment failed. Checking deployment operations.')
            azure_failure_report()
            raise DeploymentError("Azure Deployment Failed!") 
Example #21
Source File: mgmt.py    From nova-powervm with Apache License 2.0 4 votes vote down vote up
def remove_block_dev(devpath, scan_timeout=10):
    """Remove a block device from the management partition.

    This method causes the operating system of the management partition to
    delete the device special files associated with the specified block device.

    :param devpath: Any path to the block special file associated with the
                    device to be removed.
    :param scan_timeout: The maximum number of seconds after scanning to wait
                         for the specified device to disappear.
    :raise InvalidDevicePath: If the specified device or its 'delete' special
                              file cannot be found.
    :raise DeviceDeletionException: If the deletion was attempted, but the
                                    device special file is still present
                                    afterward.
    """
    # Resolve symlinks, if any, to get to the /dev/sdX path
    devpath = os.path.realpath(devpath)
    try:
        os.stat(devpath)
    except OSError:
        raise exception.InvalidDevicePath(path=devpath)
    devname = devpath.rsplit('/', 1)[-1]
    delpath = '/sys/block/%s/device/delete' % devname
    try:
        os.stat(delpath)
    except OSError:
        raise exception.InvalidDevicePath(path=delpath)
    LOG.debug("Deleting block device %(devpath)s from the management "
              "partition via special file %(delpath)s.",
              {'devpath': devpath, 'delpath': delpath})
    # Writing '1' to this sysfs file deletes the block device and rescans.
    nova.privsep.path.writefile(delpath, 'a', '1')

    # The bus scan is asynchronous.  Need to poll, waiting for the device to
    # disappear.  Stop when stat raises OSError (dev file not found) - which is
    # success - or after the specified timeout (which is failure).  Sleep 1/4
    # second between polls.
    @retrying.retry(retry_on_result=lambda result: result, wait_fixed=250,
                    stop_max_delay=scan_timeout * 1000)
    def _poll_for_del(statpath):
        try:
            os.stat(statpath)
            return True
        except OSError:
            # Device special file is absent, as expected
            return False
    try:
        _poll_for_del(devpath)
    except retrying.RetryError as re:
        # stat just kept returning (dev file continued to exist).
        raise npvmex.DeviceDeletionException(
            devpath=devpath, polls=re.last_attempt.attempt_number,
            timeout=scan_timeout)
    # Else stat raised - the device disappeared - all done. 
Example #22
Source File: arm.py    From dcos-launch with Apache License 2.0 4 votes vote down vote up
def wait_for_deployment(self, timeout=60 * 60):
        """
        Azure will not register a template instantly after deployment, so
        CloudError must be expected as retried. Once the ops are retrieved, this
        loops through all operations in the group's only deployment
        if any operations are still in progress, then this function will sleep
        once all operations are complete, if there any failures, those will be
        printed to the log stream
        """
        log.info('Waiting for deployment to finish')

        def azure_failure_report():
            deploy_ops = self.azure_wrapper.rmc.deployment_operations.list(
                    self.group_name, DEPLOYMENT_NAME.format(self.group_name))
            failures = [(op.properties.status_code, op.properties.status_message) for op
                        in deploy_ops if op.properties.provisioning_state == 'Failed']
            for failure in failures:
                log.error('Deployment operation failed! {}: {}'.format(*failure))

        @retrying.retry(
            wait_fixed=60 * 1000, stop_max_delay=timeout * 1000,
            retry_on_result=lambda res: res is False,
            retry_on_exception=lambda ex: isinstance(ex, CloudError))
        def check_deployment_operations():
            deploy_state = self.get_deployment_state()

            if deploy_state == 'Succeeded':
                return True

            elif deploy_state == 'Failed':
                log.info('Deployment failed. Checking deployment operations.')
                azure_failure_report()
                raise DeploymentError('Azure Deployment Failed!')

            else:
                log.info('Waiting for deployment. Current state: {}. It should either be Succeeded/Failed.'.format(
                        deploy_state))

                return False

        try:
            check_deployment_operations()
        except retrying.RetryError:
            log.info('Deployment failed. Checking deployment operations.')
            azure_failure_report()
            raise DeploymentError("Azure Deployment Failed!")