Python onnxruntime.SessionOptions() Examples
The following are 6
code examples of onnxruntime.SessionOptions().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
onnxruntime
, or try the search function
.
Example #1
Source File: adaptive_model.py From FARM with Apache License 2.0 | 6 votes |
def load(cls, load_dir, device, **kwargs): import onnxruntime sess_options = onnxruntime.SessionOptions() # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization. sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # Use OpenMP optimizations. Only useful for CPU, has little impact for GPUs. sess_options.intra_op_num_threads = multiprocessing.cpu_count() onnx_session = onnxruntime.InferenceSession(str(load_dir / "model.onnx"), sess_options) # Prediction heads _, ph_config_files = cls._get_prediction_head_files(load_dir, strict=False) prediction_heads = [] ph_output_type = [] for config_file in ph_config_files: # ONNX Model doesn't need have a separate neural network for PredictionHead. It only uses the # instance methods of PredictionHead class, so, we load with the load_weights param as False. head = PredictionHead.load(config_file, load_weights=False) prediction_heads.append(head) ph_output_type.append(head.ph_output_type) with open(load_dir/"model_config.json") as f: model_config = json.load(f) language = model_config["language"] return cls(onnx_session, prediction_heads, language, device)
Example #2
Source File: onnxruntime_SUT.py From inference with Apache License 2.0 | 6 votes |
def __init__(self, args): self.profile = args.profile self.options = onnxruntime.SessionOptions() self.options.enable_profiling = args.profile print("Loading ONNX model...") self.quantized = args.quantized if self.quantized: model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/bert_large_v1_1_fake_quant.onnx" else: model_path = "build/data/bert_tf_v1_1_large_fp32_384_v2/model.onnx" self.sess = onnxruntime.InferenceSession(model_path, self.options) print("Constructing SUT...") self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies) print("Finished constructing SUT.") self.qsl = get_squad_QSL()
Example #3
Source File: onnxbert.py From nboost with Apache License 2.0 | 6 votes |
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) sess_options = rt.SessionOptions() self.model_dir = glob.glob(os.path.join(self.model_dir, '*.onnx'))[0] # Set graph optimization level to ORT_ENABLE_EXTENDED to enable bert optimization. sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_EXTENDED # To enable model serialization and store the optimized graph to desired location. sess_options.optimized_model_filepath = self.model_dir self.session = rt.InferenceSession(self.model_dir, sess_options) if 'albert' in self.model_dir: self.tokenizer = AutoTokenizer.from_pretrained('albert-base-uncased') else: self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
Example #4
Source File: bert_model_optimization.py From FARM with Apache License 2.0 | 5 votes |
def optimize_by_onnxruntime(onnx_model_path, use_gpu=False, optimized_model_path=None, opt_level=99): """ Use onnxruntime package to optimize model. It could support models exported by PyTorch. Args: onnx_model_path (str): th path of input onnx model. use_gpu (bool): whether the optimized model is targeted to run in GPU. optimized_model_path (str or None): the path of optimized model. Returns: optimized_model_path: the path of optimized model """ import onnxruntime if use_gpu and 'CUDAExecutionProvider' not in onnxruntime.get_available_providers(): logger.error("There is no gpu for onnxruntime to do optimization.") return onnx_model_path sess_options = onnxruntime.SessionOptions() if opt_level == 1: sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC elif opt_level == 2: sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED else: assert opt_level == 99 sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL if optimized_model_path is None: path_prefix = onnx_model_path[:-5] #remove .onnx suffix optimized_model_path = "{}_ort_{}.onnx".format(path_prefix, "gpu" if use_gpu else "cpu") sess_options.optimized_model_filepath = optimized_model_path if not use_gpu: session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=['CPUExecutionProvider']) else: session = onnxruntime.InferenceSession(onnx_model_path, sess_options) assert 'CUDAExecutionProvider' in session.get_providers() # Make sure there is GPU assert os.path.exists(optimized_model_path) and os.path.isfile(optimized_model_path) logger.info("Save optimized model by onnxruntime to {}".format(optimized_model_path)) return optimized_model_path
Example #5
Source File: backend_onnxruntime.py From inference with Apache License 2.0 | 5 votes |
def load(self, model_path, inputs=None, outputs=None): """Load model and find input/outputs from the model file.""" opt = rt.SessionOptions() # enable level 3 optimizations # FIXME: enable below once onnxruntime 0.5 is released # opt.set_graph_optimization_level(3) self.sess = rt.InferenceSession(model_path, opt) # get input and output names if not inputs: self.inputs = [meta.name for meta in self.sess.get_inputs()] else: self.inputs = inputs if not outputs: self.outputs = [meta.name for meta in self.sess.get_outputs()] else: self.outputs = outputs return self
Example #6
Source File: onnx_launcher.py From open_model_zoo with Apache License 2.0 | 5 votes |
def _create_session_via_execution_providers_api(self, model): session_options = onnx_rt.SessionOptions() session = onnx_rt.InferenceSession(model, sess_options=session_options) self.execution_providers = self.get_value_from_config('execution_providers') available_providers = session.get_providers() contains_all(available_providers, self.execution_providers) session.set_providers(self.execution_providers) return session