Python Examples of luigi.Task

Source File: test_dependencies.py From sciluigi with MIT License

6 votes

def test_methods(self):
        wf = sl.WorkflowTask()
        touta = wf.new_task('tout', MultiOutTask,
            an_id='a')
        toutb = wf.new_task('tout', MultiOutTask,
            an_id='b')
        toutc = wf.new_task('tout', MultiOutTask,
            an_id='c')
        tin = wf.new_task('tout', MultiInTask)

        tin.in_multi = [touta.out_multi, {'a': toutb.out_multi, 'b': toutc.out_multi()}]

        # Assert outputs returns luigi targets, or list of luigi targets
        outs = touta.output()
        self.assertIsInstance(outs, list)
        for out in outs:
            self.assertIsInstance(out, luigi.Target)

        reqs = tin.requires()
        self.assertIsInstance(reqs, list)
        for req in reqs:
            self.assertIsInstance(req, luigi.Task)

Source File: nmap.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ ThreadedNmap depends on ParseMasscanOutput to run.

        TargetList expects target_file, results_dir, and db_location as parameters.
        Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.

        Returns:
            luigi.Task - ParseMasscanOutput
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "db_location": self.db_location,
        }
        return ParseMasscanOutput(**args)

Source File: nmap.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ Searchsploit depends on ThreadedNmap to run.

        TargetList expects target_file, results_dir, and db_location as parameters.
        Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.
        ThreadedNmap expects threads

        Returns:
            luigi.Task - ThreadedNmap
        """
        args = {
            "rate": self.rate,
            "ports": self.ports,
            "threads": self.threads,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "target_file": self.target_file,
            "results_dir": self.results_dir,
            "db_location": self.db_location,
        }
        return ThreadedNmapScan(**args)

Source File: masscan.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ ParseMasscanOutput depends on Masscan to run.

        Masscan expects rate, target_file, interface, and either ports or top_ports as parameters.

        Returns:
            luigi.Task - Masscan
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "db_location": self.db_location,
        }
        return MasscanScan(**args)

Source File: subdomain_takeover.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ SubjackScan depends on GatherWebTargets to run.

        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
                         and either ports or top_ports as parameters

        Returns:
            luigi.Task - GatherWebTargets
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "exempt_list": self.exempt_list,
            "db_location": self.db_location,
        }
        return GatherWebTargets(**args)

Source File: subdomain_takeover.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ TKOSubsScan depends on GatherWebTargets to run.

        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
                         and either ports or top_ports as parameters

        Returns:
            luigi.Task - GatherWebTargets
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "exempt_list": self.exempt_list,
            "db_location": self.db_location,
        }
        return GatherWebTargets(**args)

Source File: aquatone.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ AquatoneScan depends on GatherWebTargets to run.

        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
                         and either ports or top_ports as parameters

        Returns:
            luigi.Task - GatherWebTargets
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "exempt_list": self.exempt_list,
            "db_location": self.db_location,
        }
        return GatherWebTargets(**args)

Source File: waybackurls.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ WaybackurlsScan depends on GatherWebTargets to run.

        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
                         and either ports or top_ports as parameters

        Returns:
            luigi.Task - GatherWebTargets
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "exempt_list": self.exempt_list,
            "db_location": self.db_location,
        }
        return GatherWebTargets(**args)

Source File: webanalyze.py From recon-pipeline with MIT License

6 votes

def requires(self):
        """ WebanalyzeScan depends on GatherWebTargets to run.

        GatherWebTargets accepts exempt_list and expects rate, target_file, interface,
                         and either ports or top_ports as parameters

        Returns:
            luigi.Task - GatherWebTargets
        """
        args = {
            "results_dir": self.results_dir,
            "rate": self.rate,
            "target_file": self.target_file,
            "top_ports": self.top_ports,
            "interface": self.interface,
            "ports": self.ports,
            "exempt_list": self.exempt_list,
            "db_location": self.db_location,
        }
        return GatherWebTargets(**args)

Source File: core.py From GASpy with GNU Lesser General Public License v3.0

6 votes

def make_task_output_object(task):
    '''
    This function will create an instance of a luigi.LocalTarget object, which
    is what the `output` method of a Luigi task should return. The main thing
    this function does for you is that it creates a target with a standardized
    location.

    Arg:
        task    Instance of a luigi.Task object
    Returns:
        target  An instance of a luigi.LocalTarget object with the `path`
                attribute set to GASpy's standard location (as defined by
                the `make_task_output_location` function)
    '''
    output_location = make_task_output_location(task)
    target = luigi.LocalTarget(output_location)
    return target

Source File: __init__.py From d6tflow with MIT License

6 votes

def save(self, data, **kwargs):
        """
        Persist data to target

        Args:
            data (dict): data to save. keys are the self.persist keys and values is data

        """
        if self.persist==['data']: # 1 data shortcut
            self.output().save(data, **kwargs)
        else:
            targets = self.output()
            if not set(data.keys())==set(targets.keys()):
                raise ValueError('Save dictionary needs to consistent with Task.persist')
            for k, v in data.items():
                targets[k].save(v, **kwargs)

Source File: calculation_finders_test.py From GASpy with GNU Lesser General Public License v3.0

6 votes

def test_FindCalculation():
    '''
    We do a very light test of this parent class, because we will rely more
    heavily on the testing on the child classes and methods.
    '''
    finder = FindCalculation()
    assert isinstance(finder, luigi.Task)
    assert hasattr(finder, 'run')
    assert hasattr(finder, 'output')
    assert hasattr(finder, 'max_fizzles')

    # Ok, let's pick one of the child tasks to test the max_fizzles feature.
    mpid = 'mp-120'
    vasp_settings = BULK_SETTINGS['vasp']
    task = FindBulk(mpid=mpid, vasp_settings=vasp_settings, max_fizzles=0)
    try:
        with pytest.raises(ValueError, match='Since we have fizzled'):
            _ = list(task.run(_testing=True))     # noqa: F841
    finally:
        clean_up_tasks()

Source File: utils.py From GASpy with GNU Lesser General Public License v3.0

6 votes

def run_task_locally(task):
    '''
    This is similar to the `gaspy.tasks.core.run_tasks` function, but it runs
    one task and it runs it on a local scheduler. You should really only be
    using this for debugging and/or testing purposes.

    Arg:
        task    Instance of a `luigi.Task` object that you want to run
    '''
    # Ignore this silly Luigi warning that they're too lazy to fix
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', message='Parameter '
                                '"task_process_context" with value "None" is not '
                                'of type string.')

        luigi.build([task], local_scheduler=True)

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_event_not_implemented(self):
        """Test processing time event is not implemented yet"""
        bot = SlackBot(self.token, events=[PROCESSING_TIME], channels=self.channels)
        bot.set_handlers()
        task = luigi.Task()
        self.assertRaises(NotImplementedError, task.trigger_event(luigi.event.Event.PROCESSING_TIME, task))

Source File: audit.py From sciluigi with MIT License

5 votes

def save_start_time(self):
        '''
        Log start of execution of task.
        '''
        if hasattr(self, 'workflow_task') and self.workflow_task is not None:
            msg = 'Task {task} started'.format(
                task=self.get_instance_name())
            log.info(msg)

Source File: audit.py From sciluigi with MIT License

5 votes

def save_end_time(self, task_exectime_sec):
        '''
        Log end of execution of task, with execution time.
        '''
        if hasattr(self, 'workflow_task') and self.workflow_task is not None:
            msg = 'Task {task} finished after {proctime:.3f}s'.format(
                task=self.get_instance_name(),
                proctime=task_exectime_sec)
            log.info(msg)
            self.add_auditinfo('task_exectime_sec', '%.3f' % task_exectime_sec)
            for paramname, paramval in iteritems(self.param_kwargs):
                if paramname not in ['workflow_task']:
                    self.add_auditinfo(paramname, paramval)

Source File: audit.py From sciluigi with MIT License

5 votes

def save_start_time(self):
        '''
        Log start of execution of task.
        '''
        if hasattr(self, 'workflow_task') and self.workflow_task is not None:
            msg = 'Task {task} started'.format(
                task=self.get_instance_name())
            log.info(msg)

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_success(self):
        """Test successful task if queued"""
        bot = SlackBot(self.token, events=[SUCCESS], channels=self.channels)
        bot.set_handlers()
        task = luigi.Task()
        self.assertEqual(len(bot.event_queue.get(SUCCESS, [])), 0)
        task.trigger_event(luigi.event.Event.SUCCESS, task)
        self.assertEqual(len(bot.event_queue.get(SUCCESS)), 1)

Source File: audit.py From sciluigi with MIT License

5 votes

def save_end_time(self, task_exectime_sec):
        '''
        Log end of execution of task, with execution time.
        '''
        if hasattr(self, 'workflow_task') and self.workflow_task is not None:
            msg = 'Task {task} finished after {proctime:.3f}s'.format(
                task=self.get_instance_name(),
                proctime=task_exectime_sec)
            log.info(msg)
            self.add_auditinfo('task_exectime_sec', '%.3f' % task_exectime_sec)
            for paramname, paramval in iteritems(self.param_kwargs):
                if paramname not in ['workflow_task']:
                    self.add_auditinfo(paramname, paramval)

Source File: surfaces.py From GASpy with GNU Lesser General Public License v3.0

5 votes

def __run_calculate_surface_energy_task(task):
    '''
    This function will run some tasks for you and return the ones that
    successfully completed.

    Args:
        task    A list of `luigi.Task` objects, preferably ones from
                `gaspy.tasks.metadata_calculators.CalculateSurfaceEnergy`
    '''
    # Run each task again in case the relaxations are all done, but we just
    # haven't calculated the surface energy yet
    try:
        run_task(task)

    # If a task has failed and not produced an output, we don't want that to
    # stop us from updating the successful runs.
    except FileNotFoundError:
        pass

    # If the output already exists, then move on
    except luigi.target.FileAlreadyExists:
        pass

    # If some other error pops up, then we want to report it. But we also want
    # to move on so that we can still update other things.
    except:     # noqa: E722
        traceback.print_exc()
        warnings.warn('We caught the exception reported just above and '
                      'moved on without updating the collection. Here is '
                      'the offending surface energy calculation information: '
                      ' (%s, %s, %s, %s)'
                      % (task.mpid, task.miller_indices, task.shift,
                         unfreeze_dict(task.vasp_settings)))

Source File: core.py From GASpy with GNU Lesser General Public License v3.0

5 votes

def schedule_tasks(tasks, workers=1, local_scheduler=False):
    '''
    This light wrapping function will execute any tasks you want through the
    Luigi host that is listed in the `.gaspyrc.json` file.

    Arg:
        tasks               An iterable of `luigi.Task` instances
        workers             An integer indicating how many processes/workers
                            you want executing the tasks and prerequisite
                            tasks.
        local_scheduler     A Boolean indicating whether or not you want to
                            use a local scheduler. You should use a local
                            scheduler only when you want something done
                            quickly but dirtily. If you do not use local
                            scheduling, then we will use our Luigi daemon
                            to manage things, which should be the status
                            quo.
    '''
    luigi_host = utils.read_rc('luigi_host')

    # Ignore this silly Luigi warning that they're too lazy to fix
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', message='Parameter '
                                '"task_process_context" with value "None" is not '
                                'of type string.')

        if local_scheduler is False:
            luigi.build(tasks, workers=workers, scheduler_host=luigi_host)
        else:
            luigi.build(tasks, workers=workers, local_scheduler=True)

Source File: core.py From GASpy with GNU Lesser General Public License v3.0

5 votes

def make_task_output_location(task):
    '''
    We have a standard location where we store task outputs. This function
    will find that location for you.

    Arg:
        task    Instance of a luigi.Task that you want to find the output location for
    Output:
        file_name   String indication the full path of where the output is
    '''
    task_name = type(task).__name__
    task_id = task.task_id
    file_name = TASKS_CACHE_LOCATION + '%s/%s.pkl' % (task_name, task_id)
    return file_name

Source File: make_fireworks_test.py From GASpy with GNU Lesser General Public License v3.0

5 votes

def test_FireworkMaker():
    assert issubclass(FireworkMaker, luigi.Task)
    assert FireworkMaker().complete() is False

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_failure(self):
        """Test failure event adds task in queue"""
        bot = SlackBot(self.token, events=[FAILURE], channels=self.channels)
        bot.set_handlers()
        task = luigi.Task()
        self.assertEqual(len(bot.event_queue.get(FAILURE, [])), 0)
        task.trigger_event(luigi.event.Event.FAILURE, task, Exception())
        self.assertEqual(len(bot.event_queue.get(FAILURE)), 1)

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_start(self):
        """Test start event adds task in queue"""
        bot = SlackBot(self.token, events=[START], channels=self.channels)
        bot.set_handlers()
        task = luigi.Task()
        self.assertEqual(len(bot.event_queue.get(START, [])), 0)
        task.trigger_event(luigi.event.Event.START, task)
        self.assertEqual(len(bot.event_queue.get(START)), 1)

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_different_task_doesnt_empty_queue(self):
        """Test a successful task doesn't empty queue with different task"""
        class CustomTask(luigi.Task):
            pass
        bot = SlackBot(self.token, events=[SUCCESS, FAILURE], channels=self.channels)
        bot.set_handlers()
        task1 = luigi.Task() # task1 and task2 have different task_id
        task2 = CustomTask()
        self.assertEqual(len(bot.event_queue.get(FAILURE, [])), 0)
        task2.trigger_event(luigi.event.Event.FAILURE, task2, Exception())
        self.assertEqual(len(bot.event_queue.get(FAILURE)), 1)
        task1.trigger_event(luigi.event.Event.SUCCESS, task1)
        self.assertEqual(len(bot.event_queue.get(FAILURE)), 1)

Source File: test_api.py From luigi-slack with MIT License

5 votes

def test_success_empties_queue(self):
        """Test success event empties the failure queue"""
        bot = SlackBot(self.token, events=[SUCCESS, FAILURE], channels=self.channels)
        bot.set_handlers()
        task1 = luigi.Task() # task1 and task2 have the same task_id
        task2 = luigi.Task()
        self.assertEqual(len(bot.event_queue.get(FAILURE, [])), 0)
        task2.trigger_event(luigi.event.Event.FAILURE, task2, Exception())
        self.assertEqual(len(bot.event_queue.get(FAILURE)), 1)
        task1.trigger_event(luigi.event.Event.SUCCESS, task1)
        self.assertEqual(len(bot.event_queue.get(FAILURE)), 0)

Source File: test_task.py From luigi-td with Apache License 2.0

5 votes

def test_with_dependency(self):
        class DependencyTestQuery(TestQuery):
            def output(self):
                return ResultTarget(test_config.get_tmp_path('{0}.job'.format(self)))
        class DependencyTestResult(luigi.Task):
            def requires(self):
                return DependencyTestQuery()
            def output(self):
                return LocalTarget(test_config.get_tmp_path('{0}.csv'.format(self)))
        task = DependencyTestResult()
        task.run()

Source File: base.py From law with BSD 3-Clause "New" or "Revised" License

5 votes

def cli_args(self, exclude=None, replace=None):
        exclude = set() if exclude is None else set(make_list(exclude))

        # always exclude interactive parameters
        exclude |= set(self.interactive_params)

        return super(Task, self).cli_args(exclude=exclude, replace=replace)

Source File: base.py From law with BSD 3-Clause "New" or "Revised" License

5 votes

def __init__(self, *args, **kwargs):
        super(Task, self).__init__(*args, **kwargs)

        # cache for messages published to the scheduler
        self._message_cache = []

        # cache for the last progress published to the scheduler
        self._last_progress_percentage = None

Python luigi.Task() Examples