Python tensorrt.Builder() Examples

The following are 30 code examples of tensorrt.Builder(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tensorrt , or try the search function .
Example #1
Source File: sample.py    From iAI with MIT License 6 votes vote down vote up
def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

# Tries to load an engine from the provided engine_path, or builds and saves an engine to the engine_path. 
Example #2
Source File: tensorrt_runner.py    From NeMo with Apache License 2.0 5 votes vote down vote up
def create_network(explicit_batch=True, explicit_precision=False):
        with trt.Builder(TRT_LOGGER) as builder:
            network_flags = 0
            if explicit_batch:
                network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
            if explicit_precision:
                network_flags = network_flags | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))
            network = builder.create_network(flags=network_flags)
            if network is None:
                logging.critical("Invalid network")
            return network 
Example #3
Source File: caffe2tensorrt_model_converter.py    From ck-tensorrt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file, trt_model_filename,
                               output_tensor_name, output_data_type, max_workspace_size, max_batch_size):
    "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:

        if (output_data_type=='fp16'):
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))
        else:
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size

        model_tensors       = parser.parse(deploy=caffe_deploy_file, model=caffe_weights_file, network=network, dtype=trt.float32)
        network.mark_output(model_tensors.find(output_tensor_name))

        trt_model_object    = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            print('Error: cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 
Example #4
Source File: onnx2tensorrt_model_converter.py    From ck-tensorrt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main():
    "Parse command line and feed the conversion function"

    arg_parser  = argparse.ArgumentParser()
    arg_parser.add_argument('onnx_model_file',      type=str,                             help='Onnx model file')
    arg_parser.add_argument('trt_model_filename',   type=str,                             help='TensorRT model file')
    arg_parser.add_argument('--input_tensor_name',  type=str,   default='input_tensor:0', help='Input tensor type')
    arg_parser.add_argument('--output_tensor_name', type=str,   default='prob',           help='Output tensor type')
    arg_parser.add_argument('--output_data_type',   type=str,   default='fp32',           help='Model data type')
    arg_parser.add_argument('--max_workspace_size', type=int,   default=(1<<30),          help='Builder workspace size')
    arg_parser.add_argument('--max_batch_size',     type=int,   default=1,                help='Builder batch size')
    args        = arg_parser.parse_args()

    convert_onnx_model_to_trt( args.onnx_model_file, args.trt_model_filename,
                               args.input_tensor_name, args.output_tensor_name,
                               args.output_data_type, args.max_workspace_size, args.max_batch_size ) 
Example #5
Source File: tf2tensorrt_model_converter.py    From ck-tensorrt with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main():
    "Parse command line and feed the conversion function"

    arg_parser  = argparse.ArgumentParser()
    arg_parser.add_argument('tf_model_filename',    type=str,                       help='TensorFlow model file')
    arg_parser.add_argument('trt_model_filename',   type=str,                       help='TensorRT model file')
    arg_parser.add_argument('--model_data_layout',  type=str,   default='NHWC',     help='Model data layout (NHWC or NCHW)')
    arg_parser.add_argument('--input_layer_name',   type=str,   default='input',    help='Input layer name')
    arg_parser.add_argument('--input_height',       type=int,   default=224,        help='Input height')
    arg_parser.add_argument('--input_width',        type=int,   default=224,        help='Input width')
    arg_parser.add_argument('--output_layer_name',  type=str,   default='MobilenetV1/Predictions/Reshape_1', help='Output layer name')
    arg_parser.add_argument('--output_data_type',   type=str,   default='fp32',     help='Model data type')
    arg_parser.add_argument('--max_workspace_size', type=int,   default=(1<<30),    help='Builder workspace size')
    arg_parser.add_argument('--max_batch_size',     type=int,   default=1,          help='Builder batch size')
    args        = arg_parser.parse_args()

    convert_tf_model_to_trt( args.tf_model_filename, args.trt_model_filename,
                                args.model_data_layout, args.input_layer_name, args.input_height, args.input_width,
                                args.output_layer_name, args.output_data_type, args.max_workspace_size, args.max_batch_size ) 
Example #6
Source File: sample.py    From iAI with MIT License 5 votes vote down vote up
def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. 
Example #7
Source File: build_engine.py    From tensorrt_demos with MIT License 5 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys()))
    args = parser.parse_args()

    # initialize
    if trt.__version__[0] < '7':
        ctypes.CDLL(LIB_FILE)
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')

    # compile the model into TensorRT engine
    model = args.model
    spec = MODEL_SPECS[model]
    dynamic_graph = add_plugin(
        gs.DynamicGraph(spec['input_pb']),
        model,
        spec)
    _ = uff.from_tensorflow(
        dynamic_graph.as_graph_def(),
        output_nodes=['NMS'],
        output_filename=spec['tmp_uff'],
        text=True,
        debug_mode=DEBUG_UFF)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True

        parser.register_input('Input', INPUT_DIMS)
        parser.register_output('MarkOutput_0')
        parser.parse(spec['tmp_uff'], network)
        engine = builder.build_cuda_engine(network)

        buf = engine.serialize()
        with open(spec['output_bin'], 'wb') as f:
            f.write(buf) 
Example #8
Source File: uff_resnet50.py    From iAI with MIT License 5 votes vote down vote up
def build_engine_uff(model_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # We need to manually register the input and output nodes for UFF.
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        # Load the UFF model and parse it in order to populate the TensorRT network.
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network) 
Example #9
Source File: caffe_resnet50.py    From iAI with MIT License 5 votes vote down vote up
def build_engine_caffe(model_file, deploy_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # Load the Caffe model and parse it in order to populate the TensorRT network.
        # This function returns an object that we can query to find tensors by name.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        # For Caffe, we need to manually mark the output of the network.
        # Since we know the name of the output tensor, we can find it in model_tensors.
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network) 
Example #10
Source File: sample.py    From iAI with MIT License 5 votes vote down vote up
def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        builder.int8_mode = True
        builder.int8_calibrator = calib
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        return builder.build_cuda_engine(network) 
Example #11
Source File: sample.py    From iAI with MIT License 5 votes vote down vote up
def build_engine(weights):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = common.GiB(1)
        # Set the refit flag in the builder
        builder.refittable = True;
        # Populate the network using weights from the PyTorch model.
        populate_network(network, weights)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a random test case from pytorch's DataLoader 
Example #12
Source File: sample.py    From iAI with MIT License 5 votes vote down vote up
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        uff_path = model_to_uff(model_path)
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(uff_path, network)

        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. Returns loaded test case label. 
Example #13
Source File: sample.py    From iAI with MIT License 4 votes vote down vote up
def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. 
Example #14
Source File: module.py    From torch2trt with MIT License 4 votes vote down vote up
def build_tensorrt(self, net, torch_inputs):
        if self.input_names is None:
            input_names = get_torch_forward_name(net.forward)
        else:
            input_names = self.input_names
        self.graph_pth = torch2trt.GraphModule(net, torch_inputs)
        self.output_names = []
        with trt.Builder(
                self.logger) as builder, builder.create_network() as trt_net:
            builder.max_workspace_size = self.workspace
            builder.max_batch_size = self.max_batchsize
            builder.fp16_mode = builder.platform_has_fast_fp16
            # builder.refittable = False
            if self.builder_config_fn is not None:
                self.builder_config_fn(builder)
            with torch2trt.trt_network(trt_net):
                inputs = []
                for i, arg in enumerate(torch_inputs):
                    name = input_names[i]
                    inp = trt_net.add_input(name=name,
                                            shape=arg.shape[1:],
                                            dtype=trt.float32)
                    inputs.append(inp)
                outputs = self.graph_pth(*inputs, verbose=self.verbose)
            self.refit_weight_dict = self.graph_pth.graph.refit_weight_dict
            if not isinstance(outputs, (list, tuple)):
                outputs = [outputs]
            for i, out in enumerate(outputs):
                name = "output{}".format(i)
                out.name = name
                self.output_names.append(name)
                trt_net.mark_output(tensor=out)
            self.builder = builder
            if self.net_post_fn is not None:
                self.net_post_fn(trt_net)
            self.engine = builder.build_cuda_engine(trt_net)
            self.ctx = self.engine.create_execution_context()
            self.ctx = torch2trt.TorchInferenceContext(self.ctx)
        # get output shapes
        outputs = self.graph_pth(*torch_inputs)
        if not isinstance(outputs, (list, tuple)):
            outputs = [outputs]
        self.output_shapes = {}
        for n, v in zip(self.output_names, outputs):
            self.output_shapes[n] = v.shape[1:] 
Example #15
Source File: tensorrt_loaders.py    From NeMo with Apache License 2.0 4 votes vote down vote up
def __call__(self):
        class DummyContextManager(object):
            def __enter__(self):
                return None

            def __exit__(self, exc_type, exc_value, traceback):
                return None

        network_parser = self.network_loader()
        try:
            network, parser = network_parser
            assert isinstance(network, trt.INetworkDefinition)
        except (ValueError, AssertionError):
            network = network_parser
            parser = DummyContextManager()

        with trt.Builder(TRT_LOGGER) as builder, network, parser:
            if self.preprocess_network:
                logging.debug("Applying network preprocessing: {:}".format(self.preprocess_network))
                self.preprocess_network(network)

            if self.layerwise:
                TensorRTRunnerV2.mark_layerwise(network)

            if logging.getEffectiveLevel() <= logging.DEBUG:
                TensorRTRunnerV2.log_network(network)

            config = builder.create_builder_config()
            profile = TensorRTRunnerV2.build_profile(builder, network, self.profile_shapes)
            config.add_optimization_profile(profile)

            config.max_workspace_size = int(self.max_workspace_size)
            if self.fp16_mode:
                config.flags = 1 << int(trt.BuilderFlag.FP16)
            if self.int8_mode:
                config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8)
                if not network.has_explicit_precision:
                    if not self.calibrator:
                        logging.critical(
                            "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode."
                        )
                    self.calibrator.set_input_metadata(get_input_metadata_from_profile(profile, network))
                    config.int8_calibrator = self.calibrator

            logging.debug("Using builder configuration flags: {:}".format(config.flags))
            logging.info(
                "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}".format(
                    self.max_workspace_size, self.fp16_mode, self.int8_mode, self.layerwise
                )
            )
            engine = builder.build_engine(network, config)
            self.written_engine_path = write_timestamped(
                contents=lambda: engine.serialize(), dir=self.write_engine, name="tensorrt_runner_v2.engine"
            )
            return engine 
Example #16
Source File: tf2tensorrt_model_converter.py    From ck-tensorrt with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
def convert_tf_model_to_trt(tf_model_filename, trt_model_filename,
                               model_data_layout, input_layer_name, input_height, input_width,
                               output_layer_name, output_data_type, max_workspace_size, max_batch_size):
    "Convert an tf_model_filename into a trt_model_filename using the given parameters"

    uff_model = uff.from_tensorflow_frozen_model(tf_model_filename)

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:

        if model_data_layout == 'NHWC':
            parser.register_input(input_layer_name, [input_height, input_width, 3], trt.UffInputOrder.NHWC)
        else:
            parser.register_input(input_layer_name, [3, input_height, input_width], trt.UffInputOrder.NCHW)

        parser.register_output(output_layer_name)

        if not parser.parse_buffer(uff_model, network):
            raise RuntimeError("UFF model parsing (originally from {}) failed. Error: {}".format(tf_model_filename, parser.get_error(0).desc()))

        if (output_data_type=='fp32'):
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))
        else:
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size


        trt_model_object    = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 
Example #17
Source File: sample.py    From iAI with MIT License 4 votes vote down vote up
def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)

        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. 
Example #18
Source File: engine.py    From iAI with MIT License 4 votes vote down vote up
def build_engine(uff_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
    with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 30
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        builder.max_batch_size = batch_size

        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output("MarkOutput_0")
        parser.parse(uff_model_path, network)

        if not silent:
            print("Building TensorRT engine. This may take few minutes.")

        return builder.build_cuda_engine(network) 
Example #19
Source File: mnist_uff_custom_plugin.py    From iAI with MIT License 4 votes vote down vote up
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        uff_path = model_to_uff(model_path)
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(uff_path, network)

        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. Returns loaded test case label. 
Example #20
Source File: sample.py    From iAI with MIT License 4 votes vote down vote up
def build_engine(weights):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = common.GiB(1)
        # Populate the network using weights from the PyTorch model.
        populate_network(network, weights)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a random test case from pytorch's DataLoader 
Example #21
Source File: uff_resnet50.py    From iAI with MIT License 4 votes vote down vote up
def build_engine_uff(model_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # We need to manually register the input and output nodes for UFF.
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        # Load the UFF model and parse it in order to populate the TensorRT network.
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network) 
Example #22
Source File: build_engine.py    From keras_imagenet with MIT License 1 votes vote down vote up
def build_engine(onnx, verbose=False):
    """Build TensorRT engine from the ONNX model."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 30  # 1GB
        builder.max_batch_size = MAX_BATCH
        builder.fp16_mode = FP16_MODE
        with open(onnx, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # set input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        return builder.build_cuda_engine(network) 
Example #23
Source File: onnx2tensorrt_model_converter.py    From ck-tensorrt with BSD 3-Clause "New" or "Revised" License 0 votes vote down vote up
def convert_onnx_model_to_trt(onnx_model_filename, trt_model_filename,
                              input_tensor_name, output_tensor_name,
                              output_data_type, max_workspace_size, max_batch_size):
    "Convert an onnx_model_filename into a trt_model_filename using the given parameters"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    TRT_VERSION_MAJOR = int(trt.__version__.split('.')[0])

    with trt.Builder(TRT_LOGGER) as builder:
        if TRT_VERSION_MAJOR >= 7:
            flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
            network = builder.create_network(flag)
        else:
            network = builder.create_network()
        parser = trt.OnnxParser(network, TRT_LOGGER)

        if (output_data_type=='fp32'):
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))
            builder.fp16_mode = False
        else:
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size

        with open(onnx_model_filename, 'rb') as onnx_model_file:
            onnx_model = onnx_model_file.read()

        if not parser.parse(onnx_model):
            raise RuntimeError("Onnx model parsing from {} failed. Error: {}".format(onnx_model_filename, parser.get_error(0).desc()))

        if TRT_VERSION_MAJOR >= 7:
            # Create an optimization profile (see Section 7.2 of https://docs.nvidia.com/deeplearning/sdk/pdf/TensorRT-Developer-Guide.pdf).
            profile = builder.create_optimization_profile()
            # FIXME: Hardcoded for ImageNet. The minimum/optimum/maximum dimensions of a dynamic input tensor are the same.
            profile.set_shape(input_tensor_name, (1, 3, 224, 224), (max_batch_size, 3, 224, 224), (max_batch_size, 3, 224, 224))

            config = builder.create_builder_config()
            config.add_optimization_profile(profile)

            trt_model_object = builder.build_engine(network, config)
        else:
            trt_model_object = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) 
Example #24
Source File: export_jasper_onnx_to_trt.py    From NeMo with Apache License 2.0 0 votes vote down vote up
def build_engine(
    onnx_path,
    seq_len=192,
    max_seq_len=256,
    batch_size=8,
    max_batch_size=64,
    trt_fp16=True,
    verbose=True,
    max_workspace_size=None,
    encoder=True,
):
    """Builds TRT engine from an ONNX file
    Note that network output 1 is unmarked so that the engine will not use
    vestigial length calculations associated with masked_fill
    """
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    builder.max_batch_size = max_batch_size

    with open(onnx_path, 'rb') as model_fh:
        model = model_fh.read()

    model_onnx = onnx.load_model_from_string(model)
    input_feats = model_onnx.graph.input[0].type.tensor_type.shape.dim[1].dim_value
    input_name = model_onnx.graph.input[0].name

    if trt_fp16:
        builder.fp16_mode = True
        print("Optimizing for FP16")
        config_flags = 1 << int(trt.BuilderFlag.FP16)  # | 1 << int(trt.BuilderFlag.STRICT_TYPES)
    else:
        config_flags = 0
    builder.max_workspace_size = max_workspace_size if max_workspace_size else (4 * 1024 * 1024 * 1024)

    config = builder.create_builder_config()
    config.flags = config_flags

    profile = builder.create_optimization_profile()
    profile.set_shape(
        input_name,
        min=(1, input_feats, seq_len),
        opt=(batch_size, input_feats, seq_len),
        max=(max_batch_size, input_feats, max_seq_len),
    )
    config.add_optimization_profile(profile)

    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)

    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        parsed = parser.parse(model)
        print("Parsing returned ", parsed)
        return builder.build_engine(network, config=config) 
Example #25
Source File: onnx_resnet50.py    From iAI with MIT License 0 votes vote down vote up
def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            parser.parse(model.read())
        return builder.build_cuda_engine(network) 
Example #26
Source File: onnx_to_tensorrt.py    From yolov3-tensorrt with MIT License 0 votes vote down vote up
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30 # 1GB
            builder.max_batch_size = 1
            builder.fp16_mode = True
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine() 
Example #27
Source File: onnx_to_tensorrt.py    From iAI with MIT License 0 votes vote down vote up
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30 # 1GB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine() 
Example #28
Source File: onnx_to_tensorrt.py    From iAI with MIT License 0 votes vote down vote up
def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28 # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine() 
Example #29
Source File: speed_gpu.py    From Real-time-GesRec with MIT License 0 votes vote down vote up
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, \
        builder.create_network() as network, \
        trt.OnnxParser(network, TRT_LOGGER) as parser: 
        builder.max_workspace_size = 1<<30
        builder.max_batch_size = 1
        with open(model_path, "rb") as f:
            parser.parse(f.read())
        engine = builder.build_cuda_engine(network)
        return engine 
Example #30
Source File: onnx_to_tensorrt.py    From tensorrt_demos with MIT License 0 votes vote down vote up
def build_engine(onnx_file_path, engine_file_path, verbose=False):
    """Takes an ONNX file and creates a TensorRT engine."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True
        #builder.strict_type_constraints = True

        # Parse model file
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # The actual yolov3.onnx is generated with batch size 64.
            # Reshape input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        print('Completed parsing of ONNX file')

        print('Building an engine; this may take a while...')
        engine = builder.build_cuda_engine(network)
        print('Completed creating engine')
        with open(engine_file_path, 'wb') as f:
            f.write(engine.serialize())
        return engine