Python tensorrt.Builder() Examples
The following are 30
code examples of tensorrt.Builder().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
tensorrt
, or try the search function
.

Example #1
Source File: sample.py From iAI with MIT License | 6 votes |
def build_engine(deploy_file, model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.max_workspace_size = common.GiB(1) # Set the parser's plugin factory. Note that we bind the factory to a reference so # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute) parser.plugin_factory_ext = fc_factory # Parse the model and build the engine. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network) # Tries to load an engine from the provided engine_path, or builds and saves an engine to the engine_path.
Example #2
Source File: tensorrt_runner.py From NeMo with Apache License 2.0 | 5 votes |
def create_network(explicit_batch=True, explicit_precision=False): with trt.Builder(TRT_LOGGER) as builder: network_flags = 0 if explicit_batch: network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) if explicit_precision: network_flags = network_flags | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) network = builder.create_network(flags=network_flags) if network is None: logging.critical("Invalid network") return network
Example #3
Source File: caffe2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file, trt_model_filename, output_tensor_name, output_data_type, max_workspace_size, max_batch_size): "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: if (output_data_type=='fp16'): if not builder.platform_has_fast_fp16: print('Warning: This platform is not optimized for fast fp16 mode') builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) else: print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size model_tensors = parser.parse(deploy=caffe_deploy_file, model=caffe_weights_file, network=network, dtype=trt.float32) network.mark_output(model_tensors.find(output_tensor_name)) trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: print('Error: cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))
Example #4
Source File: onnx2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): "Parse command line and feed the conversion function" arg_parser = argparse.ArgumentParser() arg_parser.add_argument('onnx_model_file', type=str, help='Onnx model file') arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') arg_parser.add_argument('--input_tensor_name', type=str, default='input_tensor:0', help='Input tensor type') arg_parser.add_argument('--output_tensor_name', type=str, default='prob', help='Output tensor type') arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') args = arg_parser.parse_args() convert_onnx_model_to_trt( args.onnx_model_file, args.trt_model_filename, args.input_tensor_name, args.output_tensor_name, args.output_data_type, args.max_workspace_size, args.max_batch_size )
Example #5
Source File: tf2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): "Parse command line and feed the conversion function" arg_parser = argparse.ArgumentParser() arg_parser.add_argument('tf_model_filename', type=str, help='TensorFlow model file') arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') arg_parser.add_argument('--model_data_layout', type=str, default='NHWC', help='Model data layout (NHWC or NCHW)') arg_parser.add_argument('--input_layer_name', type=str, default='input', help='Input layer name') arg_parser.add_argument('--input_height', type=int, default=224, help='Input height') arg_parser.add_argument('--input_width', type=int, default=224, help='Input width') arg_parser.add_argument('--output_layer_name', type=str, default='MobilenetV1/Predictions/Reshape_1', help='Output layer name') arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') args = arg_parser.parse_args() convert_tf_model_to_trt( args.tf_model_filename, args.trt_model_filename, args.model_data_layout, args.input_layer_name, args.input_height, args.input_width, args.output_layer_name, args.output_data_type, args.max_workspace_size, args.max_batch_size )
Example #6
Source File: sample.py From iAI with MIT License | 5 votes |
def build_engine(model_file): # For more information on TRT basics, refer to the introductory samples. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = common.GiB(1) # Parse the Uff Network parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) parser.parse(model_file, network) # Build and return an engine. return builder.build_cuda_engine(network) # Loads a test case into the provided pagelocked_buffer.
Example #7
Source File: build_engine.py From tensorrt_demos with MIT License | 5 votes |
def main(): parser = argparse.ArgumentParser() parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys())) args = parser.parse_args() # initialize if trt.__version__[0] < '7': ctypes.CDLL(LIB_FILE) TRT_LOGGER = trt.Logger(trt.Logger.INFO) trt.init_libnvinfer_plugins(TRT_LOGGER, '') # compile the model into TensorRT engine model = args.model spec = MODEL_SPECS[model] dynamic_graph = add_plugin( gs.DynamicGraph(spec['input_pb']), model, spec) _ = uff.from_tensorflow( dynamic_graph.as_graph_def(), output_nodes=['NMS'], output_filename=spec['tmp_uff'], text=True, debug_mode=DEBUG_UFF) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True parser.register_input('Input', INPUT_DIMS) parser.register_output('MarkOutput_0') parser.parse(spec['tmp_uff'], network) engine = builder.build_cuda_engine(network) buf = engine.serialize() with open(spec['output_bin'], 'wb') as f: f.write(buf)
Example #8
Source File: uff_resnet50.py From iAI with MIT License | 5 votes |
def build_engine_uff(model_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # We need to manually register the input and output nodes for UFF. parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) # Load the UFF model and parse it in order to populate the TensorRT network. parser.parse(model_file, network) # Build and return an engine. return builder.build_cuda_engine(network)
Example #9
Source File: caffe_resnet50.py From iAI with MIT License | 5 votes |
def build_engine_caffe(model_file, deploy_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # Load the Caffe model and parse it in order to populate the TensorRT network. # This function returns an object that we can query to find tensors by name. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) # For Caffe, we need to manually mark the output of the network. # Since we know the name of the output tensor, we can find it in model_tensors. network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network)
Example #10
Source File: sample.py From iAI with MIT License | 5 votes |
def build_int8_engine(deploy_file, model_file, calib, batch_size=32): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: # We set the builder batch size to be the same as the calibrator's, as we use the same batches # during inference. Note that this is not required in general, and inference batch size is # independent of calibration batch size. builder.max_batch_size = batch_size builder.max_workspace_size = common.GiB(1) builder.int8_mode = True builder.int8_calibrator = calib # Parse Caffe model model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) # Build engine and do int8 calibration. return builder.build_cuda_engine(network)
Example #11
Source File: sample.py From iAI with MIT License | 5 votes |
def build_engine(weights): # For more information on TRT basics, refer to the introductory samples. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network: builder.max_workspace_size = common.GiB(1) # Set the refit flag in the builder builder.refittable = True; # Populate the network using weights from the PyTorch model. populate_network(network, weights) # Build and return an engine. return builder.build_cuda_engine(network) # Loads a random test case from pytorch's DataLoader
Example #12
Source File: sample.py From iAI with MIT License | 5 votes |
def build_engine(model_path): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = common.GiB(1) uff_path = model_to_uff(model_path) parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) parser.parse(uff_path, network) return builder.build_cuda_engine(network) # Loads a test case into the provided pagelocked_buffer. Returns loaded test case label.
Example #13
Source File: sample.py From iAI with MIT License | 4 votes |
def build_engine(model_file): # For more information on TRT basics, refer to the introductory samples. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = common.GiB(1) # Parse the Uff Network parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) parser.parse(model_file, network) # Build and return an engine. return builder.build_cuda_engine(network) # Loads a test case into the provided pagelocked_buffer.
Example #14
Source File: module.py From torch2trt with MIT License | 4 votes |
def build_tensorrt(self, net, torch_inputs): if self.input_names is None: input_names = get_torch_forward_name(net.forward) else: input_names = self.input_names self.graph_pth = torch2trt.GraphModule(net, torch_inputs) self.output_names = [] with trt.Builder( self.logger) as builder, builder.create_network() as trt_net: builder.max_workspace_size = self.workspace builder.max_batch_size = self.max_batchsize builder.fp16_mode = builder.platform_has_fast_fp16 # builder.refittable = False if self.builder_config_fn is not None: self.builder_config_fn(builder) with torch2trt.trt_network(trt_net): inputs = [] for i, arg in enumerate(torch_inputs): name = input_names[i] inp = trt_net.add_input(name=name, shape=arg.shape[1:], dtype=trt.float32) inputs.append(inp) outputs = self.graph_pth(*inputs, verbose=self.verbose) self.refit_weight_dict = self.graph_pth.graph.refit_weight_dict if not isinstance(outputs, (list, tuple)): outputs = [outputs] for i, out in enumerate(outputs): name = "output{}".format(i) out.name = name self.output_names.append(name) trt_net.mark_output(tensor=out) self.builder = builder if self.net_post_fn is not None: self.net_post_fn(trt_net) self.engine = builder.build_cuda_engine(trt_net) self.ctx = self.engine.create_execution_context() self.ctx = torch2trt.TorchInferenceContext(self.ctx) # get output shapes outputs = self.graph_pth(*torch_inputs) if not isinstance(outputs, (list, tuple)): outputs = [outputs] self.output_shapes = {} for n, v in zip(self.output_names, outputs): self.output_shapes[n] = v.shape[1:]
Example #15
Source File: tensorrt_loaders.py From NeMo with Apache License 2.0 | 4 votes |
def __call__(self): class DummyContextManager(object): def __enter__(self): return None def __exit__(self, exc_type, exc_value, traceback): return None network_parser = self.network_loader() try: network, parser = network_parser assert isinstance(network, trt.INetworkDefinition) except (ValueError, AssertionError): network = network_parser parser = DummyContextManager() with trt.Builder(TRT_LOGGER) as builder, network, parser: if self.preprocess_network: logging.debug("Applying network preprocessing: {:}".format(self.preprocess_network)) self.preprocess_network(network) if self.layerwise: TensorRTRunnerV2.mark_layerwise(network) if logging.getEffectiveLevel() <= logging.DEBUG: TensorRTRunnerV2.log_network(network) config = builder.create_builder_config() profile = TensorRTRunnerV2.build_profile(builder, network, self.profile_shapes) config.add_optimization_profile(profile) config.max_workspace_size = int(self.max_workspace_size) if self.fp16_mode: config.flags = 1 << int(trt.BuilderFlag.FP16) if self.int8_mode: config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8) if not network.has_explicit_precision: if not self.calibrator: logging.critical( "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode." ) self.calibrator.set_input_metadata(get_input_metadata_from_profile(profile, network)) config.int8_calibrator = self.calibrator logging.debug("Using builder configuration flags: {:}".format(config.flags)) logging.info( "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}".format( self.max_workspace_size, self.fp16_mode, self.int8_mode, self.layerwise ) ) engine = builder.build_engine(network, config) self.written_engine_path = write_timestamped( contents=lambda: engine.serialize(), dir=self.write_engine, name="tensorrt_runner_v2.engine" ) return engine
Example #16
Source File: tf2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License | 4 votes |
def convert_tf_model_to_trt(tf_model_filename, trt_model_filename, model_data_layout, input_layer_name, input_height, input_width, output_layer_name, output_data_type, max_workspace_size, max_batch_size): "Convert an tf_model_filename into a trt_model_filename using the given parameters" uff_model = uff.from_tensorflow_frozen_model(tf_model_filename) TRT_LOGGER = trt.Logger(trt.Logger.WARNING) with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: if model_data_layout == 'NHWC': parser.register_input(input_layer_name, [input_height, input_width, 3], trt.UffInputOrder.NHWC) else: parser.register_input(input_layer_name, [3, input_height, input_width], trt.UffInputOrder.NCHW) parser.register_output(output_layer_name) if not parser.parse_buffer(uff_model, network): raise RuntimeError("UFF model parsing (originally from {}) failed. Error: {}".format(tf_model_filename, parser.get_error(0).desc())) if (output_data_type=='fp32'): print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) else: if not builder.platform_has_fast_fp16: print('Warning: This platform is not optimized for fast fp16 mode') builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))
Example #17
Source File: sample.py From iAI with MIT License | 4 votes |
def build_engine(deploy_file, model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser: builder.max_workspace_size = common.GiB(1) # Set the parser's plugin factory. Note that we bind the factory to a reference so # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute) parser.plugin_factory_ext = fc_factory # Parse the model and build the engine. model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE) network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME)) return builder.build_cuda_engine(network) # Loads a test case into the provided pagelocked_buffer.
Example #18
Source File: engine.py From iAI with MIT License | 4 votes |
def build_engine(uff_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False): with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = 1 << 30 if trt_engine_datatype == trt.DataType.HALF: builder.fp16_mode = True builder.max_batch_size = batch_size parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output("MarkOutput_0") parser.parse(uff_model_path, network) if not silent: print("Building TensorRT engine. This may take few minutes.") return builder.build_cuda_engine(network)
Example #19
Source File: mnist_uff_custom_plugin.py From iAI with MIT License | 4 votes |
def build_engine(model_path): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: builder.max_workspace_size = common.GiB(1) uff_path = model_to_uff(model_path) parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) parser.parse(uff_path, network) return builder.build_cuda_engine(network) # Loads a test case into the provided pagelocked_buffer. Returns loaded test case label.
Example #20
Source File: sample.py From iAI with MIT License | 4 votes |
def build_engine(weights): # For more information on TRT basics, refer to the introductory samples. with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network: builder.max_workspace_size = common.GiB(1) # Populate the network using weights from the PyTorch model. populate_network(network, weights) # Build and return an engine. return builder.build_cuda_engine(network) # Loads a random test case from pytorch's DataLoader
Example #21
Source File: uff_resnet50.py From iAI with MIT License | 4 votes |
def build_engine_uff(model_file): # You can set the logger severity higher to suppress messages (or lower to display more messages). with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser: # Workspace size is the maximum amount of memory available to the builder while building an engine. # It should generally be set as high as possible. builder.max_workspace_size = common.GiB(1) # We need to manually register the input and output nodes for UFF. parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE) parser.register_output(ModelData.OUTPUT_NAME) # Load the UFF model and parse it in order to populate the TensorRT network. parser.parse(model_file, network) # Build and return an engine. return builder.build_cuda_engine(network)
Example #22
Source File: build_engine.py From keras_imagenet with MIT License | 1 votes |
def build_engine(onnx, verbose=False): """Build TensorRT engine from the ONNX model.""" TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 1GB builder.max_batch_size = MAX_BATCH builder.fp16_mode = FP16_MODE with open(onnx, 'rb') as model: if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None if trt.__version__[0] >= '7': # set input to batch size 1 shape = list(network.get_input(0).shape) shape[0] = 1 network.get_input(0).shape = shape return builder.build_cuda_engine(network)
Example #23
Source File: onnx2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License | 0 votes |
def convert_onnx_model_to_trt(onnx_model_filename, trt_model_filename, input_tensor_name, output_tensor_name, output_data_type, max_workspace_size, max_batch_size): "Convert an onnx_model_filename into a trt_model_filename using the given parameters" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) TRT_VERSION_MAJOR = int(trt.__version__.split('.')[0]) with trt.Builder(TRT_LOGGER) as builder: if TRT_VERSION_MAJOR >= 7: flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) else: network = builder.create_network() parser = trt.OnnxParser(network, TRT_LOGGER) if (output_data_type=='fp32'): print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) builder.fp16_mode = False else: if not builder.platform_has_fast_fp16: print('Warning: This platform is not optimized for fast fp16 mode') builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size with open(onnx_model_filename, 'rb') as onnx_model_file: onnx_model = onnx_model_file.read() if not parser.parse(onnx_model): raise RuntimeError("Onnx model parsing from {} failed. Error: {}".format(onnx_model_filename, parser.get_error(0).desc())) if TRT_VERSION_MAJOR >= 7: # Create an optimization profile (see Section 7.2 of https://docs.nvidia.com/deeplearning/sdk/pdf/TensorRT-Developer-Guide.pdf). profile = builder.create_optimization_profile() # FIXME: Hardcoded for ImageNet. The minimum/optimum/maximum dimensions of a dynamic input tensor are the same. profile.set_shape(input_tensor_name, (1, 3, 224, 224), (max_batch_size, 3, 224, 224), (max_batch_size, 3, 224, 224)) config = builder.create_builder_config() config.add_optimization_profile(profile) trt_model_object = builder.build_engine(network, config) else: trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))
Example #24
Source File: export_jasper_onnx_to_trt.py From NeMo with Apache License 2.0 | 0 votes |
def build_engine( onnx_path, seq_len=192, max_seq_len=256, batch_size=8, max_batch_size=64, trt_fp16=True, verbose=True, max_workspace_size=None, encoder=True, ): """Builds TRT engine from an ONNX file Note that network output 1 is unmarked so that the engine will not use vestigial length calculations associated with masked_fill """ TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.WARNING) builder = trt.Builder(TRT_LOGGER) builder.max_batch_size = max_batch_size with open(onnx_path, 'rb') as model_fh: model = model_fh.read() model_onnx = onnx.load_model_from_string(model) input_feats = model_onnx.graph.input[0].type.tensor_type.shape.dim[1].dim_value input_name = model_onnx.graph.input[0].name if trt_fp16: builder.fp16_mode = True print("Optimizing for FP16") config_flags = 1 << int(trt.BuilderFlag.FP16) # | 1 << int(trt.BuilderFlag.STRICT_TYPES) else: config_flags = 0 builder.max_workspace_size = max_workspace_size if max_workspace_size else (4 * 1024 * 1024 * 1024) config = builder.create_builder_config() config.flags = config_flags profile = builder.create_optimization_profile() profile.set_shape( input_name, min=(1, input_feats, seq_len), opt=(batch_size, input_feats, seq_len), max=(max_batch_size, input_feats, max_seq_len), ) config.add_optimization_profile(profile) explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) network = builder.create_network(explicit_batch) with trt.OnnxParser(network, TRT_LOGGER) as parser: parsed = parser.parse(model) print("Parsing returned ", parsed) return builder.build_engine(network, config=config)
Example #25
Source File: onnx_resnet50.py From iAI with MIT License | 0 votes |
def build_engine_onnx(model_file): with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = common.GiB(1) # Load the Onnx model and parse it in order to populate the TensorRT network. with open(model_file, 'rb') as model: parser.parse(model.read()) return builder.build_cuda_engine(network)
Example #26
Source File: onnx_to_tensorrt.py From yolov3-tensorrt with MIT License | 0 votes |
def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 1GB builder.max_batch_size = 1 builder.fp16_mode = True # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
Example #27
Source File: onnx_to_tensorrt.py From iAI with MIT License | 0 votes |
def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 30 # 1GB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
Example #28
Source File: onnx_to_tensorrt.py From iAI with MIT License | 0 votes |
def get_engine(onnx_file_path, engine_file_path=""): """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it.""" def build_engine(): """Takes an ONNX file and creates a TensorRT engine to run inference with""" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 # 256MiB builder.max_batch_size = 1 # Parse model file if not os.path.exists(onnx_file_path): print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path)) exit(0) print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') parser.parse(model.read()) print('Completed parsing of ONNX file') print('Building an engine from file {}; this may take a while...'.format(onnx_file_path)) engine = builder.build_cuda_engine(network) print("Completed creating Engine") with open(engine_file_path, "wb") as f: f.write(engine.serialize()) return engine if os.path.exists(engine_file_path): # If a serialized engine exists, use it instead of building an engine. print("Reading engine from file {}".format(engine_file_path)) with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read()) else: return build_engine()
Example #29
Source File: speed_gpu.py From Real-time-GesRec with MIT License | 0 votes |
def build_engine(model_path): with trt.Builder(TRT_LOGGER) as builder, \ builder.create_network() as network, \ trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1<<30 builder.max_batch_size = 1 with open(model_path, "rb") as f: parser.parse(f.read()) engine = builder.build_cuda_engine(network) return engine
Example #30
Source File: onnx_to_tensorrt.py From tensorrt_demos with MIT License | 0 votes |
def build_engine(onnx_file_path, engine_file_path, verbose=False): """Takes an ONNX file and creates a TensorRT engine.""" TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger() with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: builder.max_workspace_size = 1 << 28 builder.max_batch_size = 1 builder.fp16_mode = True #builder.strict_type_constraints = True # Parse model file print('Loading ONNX file from path {}...'.format(onnx_file_path)) with open(onnx_file_path, 'rb') as model: print('Beginning ONNX file parsing') if not parser.parse(model.read()): print('ERROR: Failed to parse the ONNX file.') for error in range(parser.num_errors): print(parser.get_error(error)) return None if trt.__version__[0] >= '7': # The actual yolov3.onnx is generated with batch size 64. # Reshape input to batch size 1 shape = list(network.get_input(0).shape) shape[0] = 1 network.get_input(0).shape = shape print('Completed parsing of ONNX file') print('Building an engine; this may take a while...') engine = builder.build_cuda_engine(network) print('Completed creating engine') with open(engine_file_path, 'wb') as f: f.write(engine.serialize()) return engine