Python airflow.DAG Examples
The following are 30
code examples of airflow.DAG().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
airflow
, or try the search function
.
Example #1
Source File: rawpixel_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, start_date=datetime(2020, 1, 15), schedule_interval="@monthly", catchup=False ) with dag: start_task = get_log_operator(dag, DAG_ID, "Starting") run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, "Finished") start_task >> run_task >> end_task return dag
Example #2
Source File: dbt_example.py From Example-Airflow-DAGs with Apache License 2.0 | 6 votes |
def dbt_dag(start_date, schedule_interval, default_args): temp_dag = DAG('gospel_.dbt_sub_dag', start_date=start_date, schedule_interval=schedule_interval, default_args=default_args) G = nx.read_gpickle('/home/airflowuser/project/graph.gpickle') def make_dbt_task(model_name): simple_model_name = model_name.split('.')[-1] dbt_task = BashOperator( task_id=model_name, bash_command='cd ~/gospel && dbt run --profile=warehouse --target=prod --non-destructive --models {simple_model_name}'.format(simple_model_name=simple_model_name), dag=temp_dag ) return dbt_task dbt_tasks = {} for node_name in set(G.nodes()): dbt_task = make_dbt_task(node_name) dbt_tasks[node_name] = dbt_task for edge in G.edges(): dbt_tasks[edge[0]].set_downstream(dbt_tasks[edge[1]]) return temp_dag
Example #3
Source File: phylopic_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, concurrency=1, max_active_runs=1, start_date=datetime(2011, 1, 1), schedule_interval='@daily', catchup=False, ) with dag: start_task = get_log_operator(dag, DAG_ID, 'Starting') run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, 'Finished') start_task >> run_task >> end_task return dag
Example #4
Source File: factory.py From starthinker with Apache License 2.0 | 6 votes |
def print_commandline(self): print('') print('DAG: %s' % self.dag_name) print('') instances = {} for task in self.recipe['tasks']: function = next(iter(task.keys())) # count instance per task instances.setdefault(function, 0) instances[function] += 1 print('airflow test "%s" %s_%d %s' % (self.dag_name, function, instances[function], str(date.today()))) print('')
Example #5
Source File: test_operator_util.py From cccatalog with MIT License | 6 votes |
def test_get_dated_main_runner_handles_zero_shift(): dag = DAG( dag_id='test_dag', start_date=datetime.strptime('2019-01-01', '%Y-%m-%d') ) execution_date = datetime.strptime( '2019-01-01', '%Y-%m-%d' ).replace(tzinfo=timezone.utc) main_func = PickleMock() runner = op_util.get_dated_main_runner_operator( dag, main_func, timedelta(minutes=1) ) ti = TaskInstance(runner, execution_date) ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) main_func.assert_called_with('2019-01-01')
Example #6
Source File: test_serialized_dag.py From airflow with Apache License 2.0 | 6 votes |
def test_remove_stale_dags(self): example_dags_list = list(self._write_example_dags().values()) # Remove SubDags from the list as they are not stored in DB in a separate row # and are directly added in Json blob of the main DAG filtered_example_dags_list = [dag for dag in example_dags_list if not dag.is_subdag] # Tests removing a stale DAG stale_dag = SDM(filtered_example_dags_list[0]) fresh_dag = SDM(filtered_example_dags_list[1]) # Overwrite stale_dag's last_updated to be 10 minutes ago stale_dag.last_updated = timezone.utcnow() - timezone.dt.timedelta(seconds=600) with create_session() as session: session.merge(stale_dag) session.commit() # Remove any stale DAGs older than 5 minutes SDM.remove_stale_dags(timezone.utcnow() - timezone.dt.timedelta(seconds=300)) self.assertFalse(SDM.has_dag(stale_dag.dag_id)) self.assertTrue(SDM.has_dag(fresh_dag.dag_id))
Example #7
Source File: test_datafusion.py From airflow with Apache License 2.0 | 6 votes |
def test_execute(self, mock_hook): mock_hook.return_value.get_instance.return_value = {"apiEndpoint": INSTANCE_URL} op = CloudDataFusionStartPipelineOperator( task_id="test_task", pipeline_name=PIPELINE_NAME, instance_name=INSTANCE_NAME, namespace=NAMESPACE, location=LOCATION, project_id=PROJECT_ID, runtime_args=RUNTIME_ARGS ) op.dag = mock.MagicMock(spec=DAG, task_dict={}, dag_id="test") op.execute({}) mock_hook.return_value.get_instance.assert_called_once_with( instance_name=INSTANCE_NAME, location=LOCATION, project_id=PROJECT_ID ) mock_hook.return_value.start_pipeline.assert_called_once_with( instance_url=INSTANCE_URL, pipeline_name=PIPELINE_NAME, namespace=NAMESPACE, runtime_args=RUNTIME_ARGS, )
Example #8
Source File: test_fixtures.py From dagster with Apache License 2.0 | 6 votes |
def execute_tasks_in_dag(dag, tasks, run_id, execution_date): assert isinstance(dag, DAG) handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.DEBUG) handler.setFormatter(logging.Formatter(LOG_FORMAT)) root = logging.getLogger('airflow.task.operators') root.setLevel(logging.DEBUG) root.addHandler(handler) dag_run = dag.create_dagrun(run_id=run_id, state='success', execution_date=execution_date) results = {} for task in tasks: ti = TaskInstance(task=task, execution_date=execution_date) context = ti.get_template_context() context['dag_run'] = dag_run try: results[ti] = task.execute(context) except AirflowSkipException as exc: results[ti] = exc return results
Example #9
Source File: dag_cycle_tester.py From airflow with Apache License 2.0 | 6 votes |
def test_cycle_arbitrary_loop(self): # test arbitrary loop dag = DAG( 'dag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) # E-> A -> B -> F -> A # -> C -> F with dag: op1 = DummyOperator(task_id='A') op2 = DummyOperator(task_id='B') op3 = DummyOperator(task_id='C') op4 = DummyOperator(task_id='E') op5 = DummyOperator(task_id='F') op1.set_downstream(op2) op1.set_downstream(op3) op4.set_downstream(op1) op3.set_downstream(op5) op2.set_downstream(op5) op5.set_downstream(op1) with self.assertRaises(AirflowDagCycleException): self.assertFalse(test_cycle(dag))
Example #10
Source File: dag_cycle_tester.py From airflow with Apache License 2.0 | 6 votes |
def test_cycle_large_loop(self): # large loop dag = DAG( 'dag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) # A -> B -> C -> D -> E -> A with dag: op1 = DummyOperator(task_id='A') op2 = DummyOperator(task_id='B') op3 = DummyOperator(task_id='C') op4 = DummyOperator(task_id='D') op5 = DummyOperator(task_id='E') op1.set_downstream(op2) op2.set_downstream(op3) op3.set_downstream(op4) op4.set_downstream(op5) op5.set_downstream(op1) with self.assertRaises(AirflowDagCycleException): self.assertFalse(test_cycle(dag))
Example #11
Source File: dag_cycle_tester.py From airflow with Apache License 2.0 | 6 votes |
def test_cycle_downstream_loop(self): # test downstream self loop dag = DAG( 'dag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) # A -> B -> C -> D -> E -> E with dag: op1 = DummyOperator(task_id='A') op2 = DummyOperator(task_id='B') op3 = DummyOperator(task_id='C') op4 = DummyOperator(task_id='D') op5 = DummyOperator(task_id='E') op1.set_downstream(op2) op2.set_downstream(op3) op3.set_downstream(op4) op4.set_downstream(op5) op5.set_downstream(op5) with self.assertRaises(AirflowDagCycleException): self.assertFalse(test_cycle(dag))
Example #12
Source File: _airflow_op.py From pipelines with Apache License 2.0 | 6 votes |
def _run_airflow_op(Op, *op_args, **op_kwargs): from airflow.utils import db db.initdb() from datetime import datetime from airflow import DAG, settings from airflow.models import TaskInstance, Variable, XCom dag = DAG(dag_id='anydag', start_date=datetime.now()) task = Op(*op_args, **op_kwargs, dag=dag, task_id='anytask') ti = TaskInstance(task=task, execution_date=datetime.now()) result = task.execute(ti.get_template_context()) variables = {var.id: var.val for var in settings.Session().query(Variable).all()} xcoms = {msg.key: msg.value for msg in settings.Session().query(XCom).all()} return (result, variables, xcoms)
Example #13
Source File: cleveland_museum_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, start_date=datetime(2020, 1, 15), schedule_interval="@monthly", catchup=False ) with dag: start_task = get_log_operator(dag, DAG_ID, "Starting") run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, "Finished") start_task >> run_task >> end_task return dag
Example #14
Source File: wikimedia_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, concurrency=3, max_active_runs=3, start_date=datetime(2003, 7, 1), schedule_interval='@daily', catchup=False, ) with dag: start_task = get_log_operator(dag, DAG_ID, 'Starting') run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, 'Finished') start_task >> run_task >> end_task return dag
Example #15
Source File: test_qa_check_discovery.py From FlowKit with Mozilla Public License 2.0 | 6 votes |
def test_additional_checks_collected_in_subdirs(tmpdir): from airflow import DAG from flowetl.util import get_qa_checks Path(tmpdir / "qa_checks" / "calls").mkdir(parents=True) Path(tmpdir / "qa_checks" / "calls" / "DUMMY_CHECK.sql").touch() check_operators = get_qa_checks( dag=DAG("DUMMY_DAG", start_date=datetime.now(), template_searchpath=str(tmpdir)) ) assert len(check_operators) == len(qa_checks) check_operators = get_qa_checks( dag=DAG( "DUMMY_DAG", start_date=datetime.now(), template_searchpath=str(tmpdir), params=dict(cdr_type="calls"), ), ) assert len(check_operators) > len(qa_checks)
Example #16
Source File: metropolitan_museum_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, concurrency=1, max_active_runs=1, start_date=datetime(2020, 1, 1), schedule_interval='@daily', catchup=False, ) with dag: start_task = get_log_operator(dag, DAG_ID, 'Starting') run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, 'Finished') start_task >> run_task >> end_task return dag
Example #17
Source File: sync_commoncrawl_workflow.py From cccatalog with MIT License | 6 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, start_date=datetime(2020, 1, 15), schedule_interval="0 16 15 * *", catchup=False ) with dag: start_task = get_log_operator(dag, DAG_ID, "Starting") run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, "Finished") start_task >> run_task >> end_task return dag
Example #18
Source File: subdag.py From airflow with Apache License 2.0 | 6 votes |
def subdag(parent_dag_name, child_dag_name, args): """ Generate a DAG to be used as a subdag. :param str parent_dag_name: Id of the parent DAG :param str child_dag_name: Id of the child DAG :param dict args: Default arguments to provide to the subdag :return: DAG to use as a subdag :rtype: airflow.models.DAG """ dag_subdag = DAG( dag_id='%s.%s' % (parent_dag_name, child_dag_name), default_args=args, schedule_interval="@daily", ) for i in range(5): DummyOperator( task_id='%s-task-%s' % (child_dag_name, i + 1), default_args=args, dag=dag_subdag, ) return dag_subdag # [END subdag]
Example #19
Source File: fileflow_example.py From fileflow with Apache License 2.0 | 6 votes |
def run(self, *args, **kwargs): # This is how you read the output of a previous task # The argument to read_upstream_file is based on the DAG configuration input_string = self.read_upstream_file("something") # An example bit of 'logic' output_string = self.output_template.format( input_string, self.get_input_filename("something"), self.get_output_filename() ) # And write out the results of the logic to the correct file self.write_file(output_string) logging.info(output_string) # Now let's define a DAG
Example #20
Source File: example_skip_dag.py From airflow with Apache License 2.0 | 6 votes |
def create_test_pipeline(suffix, trigger_rule, dag_): """ Instantiate a number of operators for the given DAG. :param str suffix: Suffix to append to the operator task_ids :param str trigger_rule: TriggerRule for the join task :param DAG dag_: The DAG to run the operators on """ skip_operator = DummySkipOperator(task_id='skip_operator_{}'.format(suffix), dag=dag_) always_true = DummyOperator(task_id='always_true_{}'.format(suffix), dag=dag_) join = DummyOperator(task_id=trigger_rule, dag=dag_, trigger_rule=trigger_rule) final = DummyOperator(task_id='final_{}'.format(suffix), dag=dag_) skip_operator >> join always_true >> join join >> final
Example #21
Source File: example_dingding.py From airflow with Apache License 2.0 | 6 votes |
def failure_callback(context): """ The function that will be executed on failure. :param context: The context of the executed task. :type context: dict """ message = 'AIRFLOW TASK FAILURE TIPS:\n' \ 'DAG: {}\n' \ 'TASKS: {}\n' \ 'Reason: {}\n' \ .format(context['task_instance'].dag_id, context['task_instance'].task_id, context['exception']) return DingdingOperator( task_id='dingding_success_callback', dingding_conn_id='dingding_default', message_type='text', message=message, at_all=True, ).execute(context)
Example #22
Source File: common_api_workflows.py From cccatalog with MIT License | 6 votes |
def create_dag( source, script_location, dag_id, crontab_str=None, default_args=DAG_DEFAULT_ARGS): dag = DAG( dag_id=dag_id, default_args=default_args, schedule_interval=crontab_str, catchup=False ) with dag: start_task = get_log_operator(dag, source, 'starting') run_task = get_runner_operator(dag, source, script_location) end_task = get_log_operator(dag, source, 'finished') start_task >> run_task >> end_task return dag
Example #23
Source File: test_operator_util.py From cccatalog with MIT License | 5 votes |
def test_get_runner_operator_creates_valid_string(): dag = DAG( dag_id='test_dag', start_date=datetime.strptime('2019-01-01', '%Y-%m-%d') ) runner = op_util.get_runner_operator( dag, 'test_source', '/test/script/location.py' ) expected_command = 'python /test/script/location.py --mode default' assert runner.bash_command == expected_command
Example #24
Source File: popularity_workflow.py From cccatalog with MIT License | 5 votes |
def create_dag(): dag = DAG( dag_id=DAG_ID, default_args=DAG_DEFAULT_ARGS, start_date=datetime(2020, 1, 1), schedule_interval='@monthly', catchup=False ) with dag: start_task = get_log_operator(dag, DAG_ID, 'Starting') run_task = get_runner_operator(dag) end_task = get_log_operator(dag, DAG_ID, 'Finished') start_task >> run_task >> end_task return dag
Example #25
Source File: S1_GRD_1SDV.py From evo-odas with MIT License | 5 votes |
def prepare_band_paths(get_inputs_from, *args, **kwargs): """Get Product / Band files path Dictionary from ZipInspector and extract the list of band files """ task_instance = kwargs['ti'] # band number from task name task_id = task_instance.task_id band_number = int(task_id.split('_')[-1]) log.info("Getting inputs from: " + get_inputs_from) product_bands_dict = task_instance.xcom_pull(task_ids=get_inputs_from, key=XCOM_RETURN_KEY) if product_bands_dict is None: log.info("No input from ZipInspector. Nothing to do") return None log.info("Product Band Dictionary: {}".format(pprint.pformat(product_bands_dict))) files_path=[] for k in product_bands_dict: files_path += product_bands_dict[k] # Push one of the band paths to XCom file_path = files_path[band_number - 1] return [file_path] # DAG definition
Example #26
Source File: test_qa_check_discovery.py From FlowKit with Mozilla Public License 2.0 | 5 votes |
def test_additional_checks_collected(tmpdir): from airflow import DAG from flowetl.util import get_qa_checks Path(tmpdir / "qa_checks").mkdir() Path(tmpdir / "qa_checks" / "DUMMY_CHECK.sql").touch() check_operators = get_qa_checks( dag=DAG("DUMMY_DAG", start_date=datetime.now(), template_searchpath=str(tmpdir)) ) assert len(check_operators) > len(qa_checks)
Example #27
Source File: test_good_dags.py From airflow-declarative with Apache License 2.0 | 5 votes |
def test_good_dags(path): dags = airflow_declarative.from_path(path) assert isinstance(dags, list) assert all(isinstance(dag, airflow.DAG) for dag in dags)
Example #28
Source File: test_qa_check_discovery.py From FlowKit with Mozilla Public License 2.0 | 5 votes |
def test_default_qa_checks_found(): from airflow import DAG from flowetl.util import get_qa_checks dag = DAG("DUMMY_DAG", start_date=datetime.now()) check_operators = get_qa_checks(dag=dag) assert {op.task_id: op.sql for op in check_operators} == qa_checks
Example #29
Source File: test_qa_check_discovery.py From FlowKit with Mozilla Public License 2.0 | 5 votes |
def test_name_suffix_added(tmpdir): from airflow import DAG from flowetl.util import get_qa_checks Path(tmpdir / "qa_checks" / "calls").mkdir(parents=True) Path(tmpdir / "qa_checks" / "calls" / "DUMMY_CHECK.sql").touch() check_operators = get_qa_checks( dag=DAG( "DUMMY_DAG", start_date=datetime.now(), template_searchpath=str(tmpdir), params=dict(cdr_type="calls"), ) ) assert any(op for op in check_operators if op.task_id == "DUMMY_CHECK.calls")
Example #30
Source File: test_integer_callback_arg.py From airflow-declarative with Apache License 2.0 | 5 votes |
def test_integer_callback_arg(good_dag_path): path = good_dag_path("integer_callback_arg") dags = airflow_declarative.from_path(path) assert len(dags) == 1 yml_dag = dags[0] assert isinstance(yml_dag, DAG) myoperator = yml_dag.task_dict["myoperator"] param = myoperator._callback_args["param"] assert isinstance(param, int)