Python Examples of os.setpgid

Source File: pipeline.py From Comparative-Annotation-Toolkit with Apache License 2.0

6 votes

def _setPgid(pid, pgid):
    """set pgid of a process, ignored exception caused by race condition
    that occurs if already set by parent or child has already existed"""
    # Should just ignore on EACCES, as to handle race condition with parent
    # and child.  However some Linux kernels (seen in 2.6.18-53) report ESRCH
    # or EPERM.  To handle this is a straight-forward way, just check that the
    # change has been made.  However, in some cases the change didn't take,
    # retrying seems to make the problem go away.
    for i in range(0,5):
        try:
            os.setpgid(pid, pgid)
            return
        except OSError:
            if os.getpgid(pid) == pgid:
                return
            time.sleep(0.25) # sleep for retry
    # last try, let it return an error
    os.setpgid(pid, pgid)

# FIXME: why not use pipes.quote?

Source File: dag_processing.py From airflow with Apache License 2.0

6 votes

def start(self):
        """
        Use multiple processes to parse and generate tasks for the
        DAGs in parallel. By processing them in separate processes,
        we can get parallelism and isolation from potentially harmful
        user code.
        """

        self.register_exit_signals()

        # Start a new process group
        os.setpgid(0, 0)

        self.log.info("Processing files using up to %s processes at a time ", self._parallelism)
        self.log.info("Process each file at most once every %s seconds", self._file_process_interval)
        self.log.info(
            "Checking for new files in %s every %s seconds", self._dag_directory, self.dag_dir_list_interval
        )

        return self._run_parsing_loop()

Source File: ClusterSimulation.py From ufora with Apache License 2.0

6 votes

def createGlobalSimulator(useUniqueFakeAwsDir=True):
        # Mark our process as the process leader
        os.setpgid(0, 0)

        if not os.path.exists(Setup.config().fakeAwsBaseDir):
            os.makedirs(Setup.config().fakeAwsBaseDir)
        Simulator._originalFakeAwsDir = Setup.config().fakeAwsBaseDir
        if useUniqueFakeAwsDir:
            newDirName = makeUniqueDir()
            fakeAwsBase = Setup.config().fakeAwsBaseDir
            Setup.config().fakeAwsBaseDir = newDirName
            Setup.config().logDir = newDirName
            latestLinkPath = os.path.join(fakeAwsBase, 'latest')
            if os.path.exists(latestLinkPath):
                os.unlink(latestLinkPath)
            os.symlink(newDirName, latestLinkPath)

        assert Simulator._globalSimulator is None
        Simulator._globalSimulator = Simulator()
        return Simulator._globalSimulator

Source File: timeshare.py From TikZ with GNU General Public License v3.0

6 votes

def execute(self,dt):
        if self.finished: return "finished"
        if not self.running:
            self.process = Process(target = executeInProcessGroup, args = (self,))
            self.process.start()
            print "timeshare child PID:",self.process.pid
            os.setpgid(self.process.pid,self.process.pid)
            print "timeshare process group",os.getpgid(self.process.pid)
            assert os.getpgid(self.process.pid) == self.process.pid
            print "my process group",os.getpgrp(),"which should be",os.getpgid(0)
            assert os.getpgid(self.process.pid) != os.getpgid(0)
            self.running = True
        else:
            os.killpg(self.process.pid, signal.SIGCONT)
        
        self.process.join(dt)
        if self.process.is_alive():
            os.killpg(self.process.pid, signal.SIGSTOP)
            return "still running"
        else:
            self.finished = True
            return self.q.get()

Source File: wsgi.py From searchlight with Apache License 2.0

6 votes

def __init__(self, threads=1000, workers=0):
        os.umask(0o27)  # ensure files are created with the correct privileges
        self._logger = logging.getLogger("eventlet.wsgi.server")
        self._wsgi_logger = loggers.WritableLogger(self._logger)
        self.threads = threads
        self.children = set()
        self.stale_children = set()
        self.running = True
        self.pgid = os.getpid()
        self.workers = workers
        try:
            # NOTE(flaper87): Make sure this process
            # runs in its own process group.
            os.setpgid(self.pgid, self.pgid)
        except OSError:
            # NOTE(flaper87): When running searchlight-control,
            # (searchlight's functional tests, for example)
            # setpgid fails with EPERM as searchlight-control
            # creates a fresh session, of which the newly
            # launched service becomes the leader (session
            # leaders may not change process groups)
            #
            # Running searchlight-api is safe and
            # shouldn't raise any error here.
            self.pgid = 0

Source File: __init__.py From epdb with MIT License

5 votes

def switch_pgid(self):
        try:
            if os.getpgrp() != os.tcgetpgrp(0):
                self.__old_pgid = os.getpgrp()
                os.setpgid(0, os.tcgetpgrp(0))
            else:
                self.__old_pgid = None
        except OSError:
            self.__old_pgid = None

Source File: __init__.py From epdb with MIT License

5 votes

def restore_input_output(self):
        if self.__old_stdout is not None:
            sys.stdout.flush()
            # now we reset stdout to be the whatever it was before
            sys.stdout = self.__old_stdout
        if self.__old_stdin is not None:
            sys.stdin = self.__old_stdin
        if self.__old_pgid is not None:
            os.setpgid(0, self.__old_pgid)

Source File: wsgi.py From senlin with Apache License 2.0

5 votes

def __init__(self, name, conf, threads=1000):
        os.umask(0o27)  # ensure files are created with the correct privileges
        self._logger = logging.getLogger("eventlet.wsgi.server")
        self.name = name
        self.threads = threads
        self.children = set()
        self.stale_children = set()
        self.running = True
        self.pgid = os.getpid()
        self.conf = conf
        try:
            os.setpgid(self.pgid, self.pgid)
        except OSError:
            self.pgid = 0

Source File: __init__.py From conary with Apache License 2.0

5 votes

def switch_pgid(self):
        try:
            if os.getpgrp() != os.tcgetpgrp(0):
                self.__old_pgid = os.getpgrp()
                os.setpgid(0, os.tcgetpgrp(0))
            else:
                self.__old_pgid = None
        except OSError:
            self.__old_pgid = None

Source File: __init__.py From conary with Apache License 2.0

5 votes

def restore_input_output(self):
        if self.__old_stdout is not None:
            sys.stdout.flush()
            # now we reset stdout to be the whatever it was before
            sys.stdout = self.__old_stdout
        if self.__old_stdin is not None:
            sys.stdin = self.__old_stdin
        if self.__old_pgid is not None:
            os.setpgid(0, self.__old_pgid)

Source File: base.py From antismash with GNU Affero General Public License v3.0

5 votes

def parallel_execute(commands: List[List[str]], cpus: Optional[int] = None,
                     timeout: Optional[int] = None, verbose: bool = True) -> List[int]:
    """ Limited return vals, only returns return codes
    """
    if verbose:
        runner = verbose_child_process
    else:
        runner = child_process
    os.setpgid(0, 0)
    if not cpus:
        cpus = get_config().cpus
    assert isinstance(cpus, int)
    pool = multiprocessing.Pool(cpus)
    jobs = pool.map_async(runner, commands)

    try:
        errors = jobs.get(timeout=timeout)
    except multiprocessing.TimeoutError:
        pool.terminate()
        assert isinstance(timeout, int)
        raise RuntimeError("One of %d child processes timed out after %d seconds" % (
                cpus, timeout))

    except KeyboardInterrupt:
        logging.error("Interrupted by user")
        pool.terminate()
        raise

    pool.close()

    return errors

Source File: timeshare.py From TikZ with GNU General Public License v3.0

5 votes

def executeInProcessGroup(task):
    os.setpgid(0,0)
    task.q.put(task.command(*task.arguments))

Source File: killableprocess.py From deluge-FileBotTool with GNU General Public License v3.0

5 votes

def __init__(self, *args, **kwargs):
            if len(args) >= 7:
                raise Exception("Arguments preexec_fn and after must be passed by keyword.")

            real_preexec_fn = kwargs.pop("preexec_fn", None)
            def setpgid_preexec_fn():
                os.setpgid(0, 0)
                if real_preexec_fn:
                    apply(real_preexec_fn)

            kwargs['preexec_fn'] = setpgid_preexec_fn

            subprocess.Popen.__init__(self, *args, **kwargs)

Source File: proctools.py From pycopia with Apache License 2.0

5 votes

def setpgid(pid_or_proc, pgrp):
    pid = int(pid_or_proc)
    return os.setpgid(pid, pgrp)

Source File: proctools.py From pycopia with Apache License 2.0

5 votes

def setpgid(self, pgid):
        os.setpgid(self.childpid, pgid)

Source File: dag_processing.py From airflow with Apache License 2.0

5 votes

def _run_processor_manager(dag_directory,
                               max_runs,
                               processor_factory,
                               processor_timeout,
                               signal_conn,
                               dag_ids,
                               pickle_dags,
                               async_mode):

        # Make this process start as a new process group - that makes it easy
        # to kill all sub-process of this at the OS-level, rather than having
        # to iterate the child processes
        os.setpgid(0, 0)

        setproctitle("airflow scheduler -- DagFileProcessorManager")
        # Reload configurations and settings to avoid collision with parent process.
        # Because this process may need custom configurations that cannot be shared,
        # e.g. RotatingFileHandler. And it can cause connection corruption if we
        # do not recreate the SQLA connection pool.
        os.environ['CONFIG_PROCESSOR_MANAGER_LOGGER'] = 'True'
        os.environ['AIRFLOW__LOGGING__COLORED_CONSOLE_LOG'] = 'False'
        # Replicating the behavior of how logging module was loaded
        # in logging_config.py
        importlib.reload(import_module(airflow.settings.LOGGING_CLASS_PATH.rsplit('.', 1)[0]))
        importlib.reload(airflow.settings)
        airflow.settings.initialize()
        del os.environ['CONFIG_PROCESSOR_MANAGER_LOGGER']
        processor_manager = DagFileProcessorManager(dag_directory,
                                                    max_runs,
                                                    processor_factory,
                                                    processor_timeout,
                                                    signal_conn,
                                                    dag_ids,
                                                    pickle_dags,
                                                    async_mode)

        processor_manager.start()

Source File: managers.py From automl-phase-2 with MIT License

4 votes

def first_action(self):
        signal.signal(signal.SIGTSTP, util.signal_handler)
        signal.signal(signal.SIGCONT, util.signal_handler)
        signal.signal(signal.SIGTERM, util.signal_handler)  # if termination signal received, tidy up.
        os.setpgid(0, 0)  # makes process and all children run in a separate process group, so orphans easily found
        self.pid = os.getpid()
        self.pgid = os.getpgid(0)

        # sys excepthook to attempt to log exceptions to logger rather than stderr
        def log_uncaught_exceptions(ex_cls, ex, tb):
            logging.error('An error happened and something died', exc_info=(ex_cls, ex, tb))

        sys.excepthook = log_uncaught_exceptions

        # How much memory is there?
        available = psutil.virtual_memory().available  # measured in bytes
        self.cgroup_mem_limit = int(available - self.overhead_memory)  # assume that manager and run.py fit into 0.5GB
        logger.info("Learners memory limit is %.2fGB", self.cgroup_mem_limit/2**30)

        # Create control group to limit resources
        try:
            subprocess.check_call(['which', 'cgexec'])
        except subprocess.CalledProcessError:
            # Install cgroup-bin
            installcg = "echo '{}' | sudo -S apt-get -y install cgroup-bin".format(self.password)
            retcode = subprocess.call(installcg, shell=True)
            if retcode != 0:
                logger.error("Cgroup-bin installation failed")
            else:
                logger.info("Installed cgroup-bin")

        user = pwd.getpwuid(os.getuid())[0]
        makecg = "echo '{}' | sudo -S cgcreate -a {} -g memory:/{}".format(self.password, user, self.cgroup)
        retcode = subprocess.call(makecg, shell=True)
        if retcode != 0:
            logger.error("Cgroup creation failed")

        # Limit memory for group
        with open('/sys/fs/cgroup/memory/{}/memory.limit_in_bytes'.format(self.cgroup), 'wb') as fp:
            fp.write(str(self.cgroup_mem_limit))
        with open('/sys/fs/cgroup/memory/{}/memory.swappiness'.format(self.cgroup), 'wb') as fp:
            fp.write(str(0))
        logger.info("Learners memory limit is %.2fGB", self.cgroup_mem_limit/2**30)

Source File: daemon.py From spark-cluster-deployment with Apache License 2.0

4 votes

def manager():
    # Create a new process group to corral our children
    os.setpgid(0, 0)

    # Create a listening socket on the AF_INET loopback interface
    listen_sock = socket.socket(AF_INET, SOCK_STREAM)
    listen_sock.bind(('127.0.0.1', 0))
    listen_sock.listen(max(1024, 2 * POOLSIZE, SOMAXCONN))
    listen_host, listen_port = listen_sock.getsockname()
    write_int(listen_port, sys.stdout)

    # Launch initial worker pool
    for idx in range(POOLSIZE):
        launch_worker(listen_sock)
    listen_sock.close()

    def shutdown():
        global exit_flag
        exit_flag.value = True

    # Gracefully exit on SIGTERM, don't die on SIGHUP
    signal.signal(SIGTERM, lambda signum, frame: shutdown())
    signal.signal(SIGHUP, SIG_IGN)

    # Cleanup zombie children
    def handle_sigchld(*args):
        try:
            pid, status = os.waitpid(0, os.WNOHANG)
            if status != 0 and not should_exit():
                raise RuntimeError("worker crashed: %s, %s" % (pid, status))
        except EnvironmentError as err:
            if err.errno not in (ECHILD, EINTR):
                raise
    signal.signal(SIGCHLD, handle_sigchld)

    # Initialization complete
    sys.stdout.close()
    try:
        while not should_exit():
            try:
                # Spark tells us to exit by closing stdin
                if os.read(0, 512) == '':
                    shutdown()
            except EnvironmentError as err:
                if err.errno != EINTR:
                    shutdown()
                    raise
    finally:
        signal.signal(SIGTERM, SIG_DFL)
        exit_flag.value = True
        # Send SIGHUP to notify workers of shutdown
        os.kill(0, SIGHUP)

Source File: proctools.py From pycopia with Apache License 2.0

4 votes

def submethod(self, _method, args=None, kwargs=None, pwent=None):
        args = args or ()
        kwargs = kwargs or {}
        signal.signal(SIGCHLD, SIG_DFL) # critical area
        proc = SubProcess(pwent=pwent)
        if proc.childpid == 0: # in child
            os.setpgid(0, self._pgid)
            sys.excepthook = sys.__excepthook__
            self._procs.clear()
            try:
                rv = _method(*args, **kwargs)
            except SystemExit as val:
                rv = val.code
            except:
                ex, val, tb = sys.exc_info()
                try:
                    import traceback
                    try:
                        fname = _method.__name__
                    except AttributeError:
                        try:
                            fname = _method.__class__.__name__
                        except AttributeError:
                            fname = str(_method)
                    with open("/tmp/" + fname + "_error.log", "w+") as errfile:
                        traceback.print_exception(ex, val, tb, None, errfile)
                finally:
                    ex = val = tb = None
                rv = 127
            if rv is None:
                rv = 0
            try:
                rv = int(rv)
            except:
                rv = 0
            os._exit(rv)
        else:
            self._procs[proc.childpid] = proc
            signal.signal(SIGCHLD, self._child_handler)
            signal.siginterrupt(SIGCHLD, False)
            return proc

    # introspection and query methods

Source File: standard_task_runner.py From airflow with Apache License 2.0

4 votes

def _start_by_fork(self):  # pylint: disable=inconsistent-return-statements
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            from airflow.cli.cli_parser import get_parser
            from airflow.sentry import Sentry
            import signal
            import airflow.settings as settings

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)
            # Start a new process group
            os.setpgid(0, 0)

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            self.log.info('Running: %s', self._command)
            self.log.info('Job %s: Subtask %s', self._task_instance.job_id, self._task_instance.task_id)

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date}"
            if hasattr(args, "job_id"):
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            try:
                args.func(args, dag=self.dag)
                return_code = 0
            except Exception:  # pylint: disable=broad-except
                return_code = 1
            finally:
                # Explicitly flush any pending exception to Sentry if enabled
                Sentry.flush()
                os._exit(return_code)  # pylint: disable=protected-access

Python os.setpgid() Examples