Python onnxruntime.SessionOptions() Examples

The following are 6 code examples of onnxruntime.SessionOptions(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module onnxruntime , or try the search function .
Example #1
Source File: adaptive_model.py    From FARM with Apache License 2.0 6 votes vote down vote up
def load(cls, load_dir, device, **kwargs):
        import onnxruntime
        sess_options = onnxruntime.SessionOptions()
        # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization.
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
        # Use OpenMP optimizations. Only useful for CPU, has little impact for GPUs.
        sess_options.intra_op_num_threads = multiprocessing.cpu_count()
        onnx_session = onnxruntime.InferenceSession(str(load_dir / "model.onnx"), sess_options)

        # Prediction heads
        _, ph_config_files = cls._get_prediction_head_files(load_dir, strict=False)
        prediction_heads = []
        ph_output_type = []
        for config_file in ph_config_files:
            # ONNX Model doesn't need have a separate neural network for PredictionHead. It only uses the
            # instance methods of PredictionHead class, so, we load with the load_weights param as False.
            head = PredictionHead.load(config_file, load_weights=False)
            prediction_heads.append(head)
            ph_output_type.append(head.ph_output_type)

        with open(load_dir/"model_config.json") as f:
            model_config = json.load(f)
            language = model_config["language"]

        return cls(onnx_session, prediction_heads, language, device) 
Example #2
Source File: onnxruntime_SUT.py    From inference with Apache License 2.0 6 votes vote down vote up
def __init__(self, args):
        self.profile = args.profile
        self.options = onnxruntime.SessionOptions()
        self.options.enable_profiling = args.profile

        print("Loading ONNX model...")
        self.quantized = args.quantized
        if self.quantized:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/bert_large_v1_1_fake_quant.onnx"
        else:
            model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/model.onnx"
        self.sess = onnxruntime.InferenceSession(model_path, self.options)

        print("Constructing SUT...")
        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
        print("Finished constructing SUT.")

        self.qsl = get_squad_QSL() 
Example #3
Source File: onnxbert.py    From nboost with Apache License 2.0 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        sess_options = rt.SessionOptions()

        self.model_dir = glob.glob(os.path.join(self.model_dir, '*.onnx'))[0]

        # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization.
        sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED

        # To enable model serialization and store the optimized graph to desired location.
        sess_options.optimized_model_filepath = self.model_dir
        self.session = rt.InferenceSession(self.model_dir, sess_options)
        if 'albert' in self.model_dir:
            self.tokenizer = AutoTokenizer.from_pretrained('albert-base-uncased')
        else:
            self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') 
Example #4
Source File: bert_model_optimization.py    From FARM with Apache License 2.0 5 votes vote down vote up
def optimize_by_onnxruntime(onnx_model_path, use_gpu=False, optimized_model_path=None, opt_level=99):
    """
    Use onnxruntime package to optimize model. It could support models exported by PyTorch.

    Args:
        onnx_model_path (str): th path of input onnx model.
        use_gpu (bool): whether the optimized model is targeted to run in GPU.
        optimized_model_path (str or None): the path of optimized model.

    Returns:
        optimized_model_path: the path of optimized model
    """
    import onnxruntime

    if use_gpu and 'CUDAExecutionProvider' not in onnxruntime.get_available_providers():
        logger.error("There is no gpu for onnxruntime to do optimization.")
        return onnx_model_path

    sess_options = onnxruntime.SessionOptions()
    if opt_level == 1:
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC
    elif opt_level == 2:
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
    else:
        assert opt_level == 99
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL

    if optimized_model_path is None:
        path_prefix = onnx_model_path[:-5]  #remove .onnx suffix
        optimized_model_path = "{}_ort_{}.onnx".format(path_prefix, "gpu" if use_gpu else "cpu")

    sess_options.optimized_model_filepath = optimized_model_path

    if not use_gpu:
        session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=['CPUExecutionProvider'])
    else:
        session = onnxruntime.InferenceSession(onnx_model_path, sess_options)
        assert 'CUDAExecutionProvider' in session.get_providers()  # Make sure there is GPU

    assert os.path.exists(optimized_model_path) and os.path.isfile(optimized_model_path)
    logger.info("Save optimized model by onnxruntime to {}".format(optimized_model_path))
    return optimized_model_path 
Example #5
Source File: backend_onnxruntime.py    From inference with Apache License 2.0 5 votes vote down vote up
def load(self, model_path, inputs=None, outputs=None):
        """Load model and find input/outputs from the model file."""
        opt = rt.SessionOptions()
        # enable level 3 optimizations
        # FIXME: enable below once onnxruntime 0.5 is released
        # opt.set_graph_optimization_level(3)
        self.sess = rt.InferenceSession(model_path, opt)
        # get input and output names
        if not inputs:
            self.inputs = [meta.name for meta in self.sess.get_inputs()]
        else:
            self.inputs = inputs
        if not outputs:
            self.outputs = [meta.name for meta in self.sess.get_outputs()]
        else:
            self.outputs = outputs
        return self 
Example #6
Source File: onnx_launcher.py    From open_model_zoo with Apache License 2.0 5 votes vote down vote up
def _create_session_via_execution_providers_api(self, model):
        session_options = onnx_rt.SessionOptions()
        session = onnx_rt.InferenceSession(model, sess_options=session_options)
        self.execution_providers = self.get_value_from_config('execution_providers')
        available_providers = session.get_providers()
        contains_all(available_providers, self.execution_providers)
        session.set_providers(self.execution_providers)

        return session