Python Examples of tensorrt.Builder

Source File: sample.py From iAI with MIT License

6 votes

def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

# Tries to load an engine from the provided engine_path, or builds and saves an engine to the engine_path.

Source File: tensorrt_runner.py From NeMo with Apache License 2.0

5 votes

def create_network(explicit_batch=True, explicit_precision=False):
        with trt.Builder(TRT_LOGGER) as builder:
            network_flags = 0
            if explicit_batch:
                network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
            if explicit_precision:
                network_flags = network_flags | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))
            network = builder.create_network(flags=network_flags)
            if network is None:
                logging.critical("Invalid network")
            return network

Source File: caffe2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License

5 votes

def convert_caffe_model_to_trt(caffe_weights_file, caffe_deploy_file, trt_model_filename,
                               output_tensor_name, output_data_type, max_workspace_size, max_batch_size):
    "Convert a pair of (caffe_weights_file,caffe_deploy_file) into a trt_model_file using the given parameters"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:

        if (output_data_type=='fp16'):
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))
        else:
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size

        model_tensors       = parser.parse(deploy=caffe_deploy_file, model=caffe_weights_file, network=network, dtype=trt.float32)
        network.mark_output(model_tensors.find(output_tensor_name))

        trt_model_object    = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            print('Error: cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))

Source File: onnx2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License

5 votes

def main():
    "Parse command line and feed the conversion function"

    arg_parser  = argparse.ArgumentParser()
    arg_parser.add_argument('onnx_model_file',      type=str,                             help='Onnx model file')
    arg_parser.add_argument('trt_model_filename',   type=str,                             help='TensorRT model file')
    arg_parser.add_argument('--input_tensor_name',  type=str,   default='input_tensor:0', help='Input tensor type')
    arg_parser.add_argument('--output_tensor_name', type=str,   default='prob',           help='Output tensor type')
    arg_parser.add_argument('--output_data_type',   type=str,   default='fp32',           help='Model data type')
    arg_parser.add_argument('--max_workspace_size', type=int,   default=(1<<30),          help='Builder workspace size')
    arg_parser.add_argument('--max_batch_size',     type=int,   default=1,                help='Builder batch size')
    args        = arg_parser.parse_args()

    convert_onnx_model_to_trt( args.onnx_model_file, args.trt_model_filename,
                               args.input_tensor_name, args.output_tensor_name,
                               args.output_data_type, args.max_workspace_size, args.max_batch_size )

Source File: tf2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License

5 votes

def main():
    "Parse command line and feed the conversion function"

    arg_parser  = argparse.ArgumentParser()
    arg_parser.add_argument('tf_model_filename',    type=str,                       help='TensorFlow model file')
    arg_parser.add_argument('trt_model_filename',   type=str,                       help='TensorRT model file')
    arg_parser.add_argument('--model_data_layout',  type=str,   default='NHWC',     help='Model data layout (NHWC or NCHW)')
    arg_parser.add_argument('--input_layer_name',   type=str,   default='input',    help='Input layer name')
    arg_parser.add_argument('--input_height',       type=int,   default=224,        help='Input height')
    arg_parser.add_argument('--input_width',        type=int,   default=224,        help='Input width')
    arg_parser.add_argument('--output_layer_name',  type=str,   default='MobilenetV1/Predictions/Reshape_1', help='Output layer name')
    arg_parser.add_argument('--output_data_type',   type=str,   default='fp32',     help='Model data type')
    arg_parser.add_argument('--max_workspace_size', type=int,   default=(1<<30),    help='Builder workspace size')
    arg_parser.add_argument('--max_batch_size',     type=int,   default=1,          help='Builder batch size')
    args        = arg_parser.parse_args()

    convert_tf_model_to_trt( args.tf_model_filename, args.trt_model_filename,
                                args.model_data_layout, args.input_layer_name, args.input_height, args.input_width,
                                args.output_layer_name, args.output_data_type, args.max_workspace_size, args.max_batch_size )

Source File: sample.py From iAI with MIT License

5 votes

def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer.

Source File: build_engine.py From tensorrt_demos with MIT License

5 votes

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('model', type=str, choices=list(MODEL_SPECS.keys()))
    args = parser.parse_args()

    # initialize
    if trt.__version__[0] < '7':
        ctypes.CDLL(LIB_FILE)
    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')

    # compile the model into TensorRT engine
    model = args.model
    spec = MODEL_SPECS[model]
    dynamic_graph = add_plugin(
        gs.DynamicGraph(spec['input_pb']),
        model,
        spec)
    _ = uff.from_tensorflow(
        dynamic_graph.as_graph_def(),
        output_nodes=['NMS'],
        output_filename=spec['tmp_uff'],
        text=True,
        debug_mode=DEBUG_UFF)
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True

        parser.register_input('Input', INPUT_DIMS)
        parser.register_output('MarkOutput_0')
        parser.parse(spec['tmp_uff'], network)
        engine = builder.build_cuda_engine(network)

        buf = engine.serialize()
        with open(spec['output_bin'], 'wb') as f:
            f.write(buf)

Source File: uff_resnet50.py From iAI with MIT License

5 votes

def build_engine_uff(model_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # We need to manually register the input and output nodes for UFF.
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        # Load the UFF model and parse it in order to populate the TensorRT network.
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

Source File: caffe_resnet50.py From iAI with MIT License

5 votes

def build_engine_caffe(model_file, deploy_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # Load the Caffe model and parse it in order to populate the TensorRT network.
        # This function returns an object that we can query to find tensors by name.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        # For Caffe, we need to manually mark the output of the network.
        # Since we know the name of the output tensor, we can find it in model_tensors.
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

Source File: sample.py From iAI with MIT License

5 votes

def build_int8_engine(deploy_file, model_file, calib, batch_size=32):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        # We set the builder batch size to be the same as the calibrator's, as we use the same batches
        # during inference. Note that this is not required in general, and inference batch size is
        # independent of calibration batch size.
        builder.max_batch_size = batch_size
        builder.max_workspace_size = common.GiB(1)
        builder.int8_mode = True
        builder.int8_calibrator = calib
        # Parse Caffe model
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        # Build engine and do int8 calibration.
        return builder.build_cuda_engine(network)

Source File: sample.py From iAI with MIT License

5 votes

def build_engine(weights):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = common.GiB(1)
        # Set the refit flag in the builder
        builder.refittable = True;
        # Populate the network using weights from the PyTorch model.
        populate_network(network, weights)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a random test case from pytorch's DataLoader

Source File: sample.py From iAI with MIT License

5 votes

def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        uff_path = model_to_uff(model_path)
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(uff_path, network)

        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. Returns loaded test case label.

Source File: sample.py From iAI with MIT License

4 votes

def build_engine(model_file):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        # Parse the Uff Network
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer.

Source File: module.py From torch2trt with MIT License

4 votes

def build_tensorrt(self, net, torch_inputs):
        if self.input_names is None:
            input_names = get_torch_forward_name(net.forward)
        else:
            input_names = self.input_names
        self.graph_pth = torch2trt.GraphModule(net, torch_inputs)
        self.output_names = []
        with trt.Builder(
                self.logger) as builder, builder.create_network() as trt_net:
            builder.max_workspace_size = self.workspace
            builder.max_batch_size = self.max_batchsize
            builder.fp16_mode = builder.platform_has_fast_fp16
            # builder.refittable = False
            if self.builder_config_fn is not None:
                self.builder_config_fn(builder)
            with torch2trt.trt_network(trt_net):
                inputs = []
                for i, arg in enumerate(torch_inputs):
                    name = input_names[i]
                    inp = trt_net.add_input(name=name,
                                            shape=arg.shape[1:],
                                            dtype=trt.float32)
                    inputs.append(inp)
                outputs = self.graph_pth(*inputs, verbose=self.verbose)
            self.refit_weight_dict = self.graph_pth.graph.refit_weight_dict
            if not isinstance(outputs, (list, tuple)):
                outputs = [outputs]
            for i, out in enumerate(outputs):
                name = "output{}".format(i)
                out.name = name
                self.output_names.append(name)
                trt_net.mark_output(tensor=out)
            self.builder = builder
            if self.net_post_fn is not None:
                self.net_post_fn(trt_net)
            self.engine = builder.build_cuda_engine(trt_net)
            self.ctx = self.engine.create_execution_context()
            self.ctx = torch2trt.TorchInferenceContext(self.ctx)
        # get output shapes
        outputs = self.graph_pth(*torch_inputs)
        if not isinstance(outputs, (list, tuple)):
            outputs = [outputs]
        self.output_shapes = {}
        for n, v in zip(self.output_names, outputs):
            self.output_shapes[n] = v.shape[1:]

Source File: tensorrt_loaders.py From NeMo with Apache License 2.0

4 votes

def __call__(self):
        class DummyContextManager(object):
            def __enter__(self):
                return None

            def __exit__(self, exc_type, exc_value, traceback):
                return None

        network_parser = self.network_loader()
        try:
            network, parser = network_parser
            assert isinstance(network, trt.INetworkDefinition)
        except (ValueError, AssertionError):
            network = network_parser
            parser = DummyContextManager()

        with trt.Builder(TRT_LOGGER) as builder, network, parser:
            if self.preprocess_network:
                logging.debug("Applying network preprocessing: {:}".format(self.preprocess_network))
                self.preprocess_network(network)

            if self.layerwise:
                TensorRTRunnerV2.mark_layerwise(network)

            if logging.getEffectiveLevel() <= logging.DEBUG:
                TensorRTRunnerV2.log_network(network)

            config = builder.create_builder_config()
            profile = TensorRTRunnerV2.build_profile(builder, network, self.profile_shapes)
            config.add_optimization_profile(profile)

            config.max_workspace_size = int(self.max_workspace_size)
            if self.fp16_mode:
                config.flags = 1 << int(trt.BuilderFlag.FP16)
            if self.int8_mode:
                config.flags = config.flags | 1 << int(trt.BuilderFlag.INT8)
                if not network.has_explicit_precision:
                    if not self.calibrator:
                        logging.critical(
                            "Network does not have explicit precision. A calibrator must be provided in order to use int8 mode."
                        )
                    self.calibrator.set_input_metadata(get_input_metadata_from_profile(profile, network))
                    config.int8_calibrator = self.calibrator

            logging.debug("Using builder configuration flags: {:}".format(config.flags))
            logging.info(
                "Building engine: max workspace size={:} bytes, fp16={:}, int8={:}, layerwise={:}".format(
                    self.max_workspace_size, self.fp16_mode, self.int8_mode, self.layerwise
                )
            )
            engine = builder.build_engine(network, config)
            self.written_engine_path = write_timestamped(
                contents=lambda: engine.serialize(), dir=self.write_engine, name="tensorrt_runner_v2.engine"
            )
            return engine

Source File: tf2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License

4 votes

def convert_tf_model_to_trt(tf_model_filename, trt_model_filename,
                               model_data_layout, input_layer_name, input_height, input_width,
                               output_layer_name, output_data_type, max_workspace_size, max_batch_size):
    "Convert an tf_model_filename into a trt_model_filename using the given parameters"

    uff_model = uff.from_tensorflow_frozen_model(tf_model_filename)

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:

        if model_data_layout == 'NHWC':
            parser.register_input(input_layer_name, [input_height, input_width, 3], trt.UffInputOrder.NHWC)
        else:
            parser.register_input(input_layer_name, [3, input_height, input_width], trt.UffInputOrder.NCHW)

        parser.register_output(output_layer_name)

        if not parser.parse_buffer(uff_model, network):
            raise RuntimeError("UFF model parsing (originally from {}) failed. Error: {}".format(tf_model_filename, parser.get_error(0).desc()))

        if (output_data_type=='fp32'):
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))
        else:
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size


        trt_model_object    = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))

Source File: sample.py From iAI with MIT License

4 votes

def build_engine(deploy_file, model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        # Set the parser's plugin factory. Note that we bind the factory to a reference so
        # that we can destroy it later. (parser.plugin_factory_ext is a write-only attribute)
        parser.plugin_factory_ext = fc_factory

        # Parse the model and build the engine.
        model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)

        network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer.

Source File: engine.py From iAI with MIT License

4 votes

def build_engine(uff_model_path, trt_logger, trt_engine_datatype=trt.DataType.FLOAT, batch_size=1, silent=False):
    with trt.Builder(trt_logger) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = 1 << 30
        if trt_engine_datatype == trt.DataType.HALF:
            builder.fp16_mode = True
        builder.max_batch_size = batch_size

        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output("MarkOutput_0")
        parser.parse(uff_model_path, network)

        if not silent:
            print("Building TensorRT engine. This may take few minutes.")

        return builder.build_cuda_engine(network)

Source File: mnist_uff_custom_plugin.py From iAI with MIT License

4 votes

def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)

        uff_path = model_to_uff(model_path)
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(uff_path, network)

        return builder.build_cuda_engine(network)

# Loads a test case into the provided pagelocked_buffer. Returns loaded test case label.

Source File: sample.py From iAI with MIT License

4 votes

def build_engine(weights):
    # For more information on TRT basics, refer to the introductory samples.
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
        builder.max_workspace_size = common.GiB(1)
        # Populate the network using weights from the PyTorch model.
        populate_network(network, weights)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

# Loads a random test case from pytorch's DataLoader

Source File: uff_resnet50.py From iAI with MIT License

4 votes

def build_engine_uff(model_file):
    # You can set the logger severity higher to suppress messages (or lower to display more messages).
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        # Workspace size is the maximum amount of memory available to the builder while building an engine.
        # It should generally be set as high as possible.
        builder.max_workspace_size = common.GiB(1)
        # We need to manually register the input and output nodes for UFF.
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        # Load the UFF model and parse it in order to populate the TensorRT network.
        parser.parse(model_file, network)
        # Build and return an engine.
        return builder.build_cuda_engine(network)

Source File: build_engine.py From keras_imagenet with MIT License

1 votes

def build_engine(onnx, verbose=False):
    """Build TensorRT engine from the ONNX model."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 30  # 1GB
        builder.max_batch_size = MAX_BATCH
        builder.fp16_mode = FP16_MODE
        with open(onnx, 'rb') as model:
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # set input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        return builder.build_cuda_engine(network)

Source File: onnx2tensorrt_model_converter.py From ck-tensorrt with BSD 3-Clause "New" or "Revised" License

0 votes

def convert_onnx_model_to_trt(onnx_model_filename, trt_model_filename,
                              input_tensor_name, output_tensor_name,
                              output_data_type, max_workspace_size, max_batch_size):
    "Convert an onnx_model_filename into a trt_model_filename using the given parameters"

    TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

    TRT_VERSION_MAJOR = int(trt.__version__.split('.')[0])

    with trt.Builder(TRT_LOGGER) as builder:
        if TRT_VERSION_MAJOR >= 7:
            flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
            network = builder.create_network(flag)
        else:
            network = builder.create_network()
        parser = trt.OnnxParser(network, TRT_LOGGER)

        if (output_data_type=='fp32'):
            print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size))
            builder.fp16_mode = False
        else:
            if not builder.platform_has_fast_fp16:
                print('Warning: This platform is not optimized for fast fp16 mode')

            builder.fp16_mode = True
            print('Converting into fp16, max_batch_size={}'.format(max_batch_size))

        builder.max_workspace_size  = max_workspace_size
        builder.max_batch_size      = max_batch_size

        with open(onnx_model_filename, 'rb') as onnx_model_file:
            onnx_model = onnx_model_file.read()

        if not parser.parse(onnx_model):
            raise RuntimeError("Onnx model parsing from {} failed. Error: {}".format(onnx_model_filename, parser.get_error(0).desc()))

        if TRT_VERSION_MAJOR >= 7:
            # Create an optimization profile (see Section 7.2 of https://docs.nvidia.com/deeplearning/sdk/pdf/TensorRT-Developer-Guide.pdf).
            profile = builder.create_optimization_profile()
            # FIXME: Hardcoded for ImageNet. The minimum/optimum/maximum dimensions of a dynamic input tensor are the same.
            profile.set_shape(input_tensor_name, (1, 3, 224, 224), (max_batch_size, 3, 224, 224), (max_batch_size, 3, 224, 224))

            config = builder.create_builder_config()
            config.add_optimization_profile(profile)

            trt_model_object = builder.build_engine(network, config)
        else:
            trt_model_object = builder.build_cuda_engine(network)

        try:
            serialized_trt_model = trt_model_object.serialize()
            with open(trt_model_filename, "wb") as trt_model_file:
                trt_model_file.write(serialized_trt_model)
        except:
            raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename))

Source File: export_jasper_onnx_to_trt.py From NeMo with Apache License 2.0

0 votes

def build_engine(
    onnx_path,
    seq_len=192,
    max_seq_len=256,
    batch_size=8,
    max_batch_size=64,
    trt_fp16=True,
    verbose=True,
    max_workspace_size=None,
    encoder=True,
):
    """Builds TRT engine from an ONNX file
    Note that network output 1 is unmarked so that the engine will not use
    vestigial length calculations associated with masked_fill
    """
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.WARNING)
    builder = trt.Builder(TRT_LOGGER)
    builder.max_batch_size = max_batch_size

    with open(onnx_path, 'rb') as model_fh:
        model = model_fh.read()

    model_onnx = onnx.load_model_from_string(model)
    input_feats = model_onnx.graph.input[0].type.tensor_type.shape.dim[1].dim_value
    input_name = model_onnx.graph.input[0].name

    if trt_fp16:
        builder.fp16_mode = True
        print("Optimizing for FP16")
        config_flags = 1 << int(trt.BuilderFlag.FP16)  # | 1 << int(trt.BuilderFlag.STRICT_TYPES)
    else:
        config_flags = 0
    builder.max_workspace_size = max_workspace_size if max_workspace_size else (4 * 1024 * 1024 * 1024)

    config = builder.create_builder_config()
    config.flags = config_flags

    profile = builder.create_optimization_profile()
    profile.set_shape(
        input_name,
        min=(1, input_feats, seq_len),
        opt=(batch_size, input_feats, seq_len),
        max=(max_batch_size, input_feats, max_seq_len),
    )
    config.add_optimization_profile(profile)

    explicit_batch = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
    network = builder.create_network(explicit_batch)

    with trt.OnnxParser(network, TRT_LOGGER) as parser:
        parsed = parser.parse(model)
        print("Parsing returned ", parsed)
        return builder.build_engine(network, config=config)

Source File: onnx_resnet50.py From iAI with MIT License

0 votes

def build_engine_onnx(model_file):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = common.GiB(1)
        # Load the Onnx model and parse it in order to populate the TensorRT network.
        with open(model_file, 'rb') as model:
            parser.parse(model.read())
        return builder.build_cuda_engine(network)

Source File: onnx_to_tensorrt.py From yolov3-tensorrt with MIT License

0 votes

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30 # 1GB
            builder.max_batch_size = 1
            builder.fp16_mode = True
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

Source File: onnx_to_tensorrt.py From iAI with MIT License

0 votes

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 30 # 1GB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

Source File: onnx_to_tensorrt.py From iAI with MIT License

0 votes

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
            builder.max_workspace_size = 1 << 28 # 256MiB
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                parser.parse(model.read())
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            engine = builder.build_cuda_engine(network)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(engine.serialize())
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

Source File: speed_gpu.py From Real-time-GesRec with MIT License

0 votes

def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, \
        builder.create_network() as network, \
        trt.OnnxParser(network, TRT_LOGGER) as parser: 
        builder.max_workspace_size = 1<<30
        builder.max_batch_size = 1
        with open(model_path, "rb") as f:
            parser.parse(f.read())
        engine = builder.build_cuda_engine(network)
        return engine

Source File: onnx_to_tensorrt.py From tensorrt_demos with MIT License

0 votes

def build_engine(onnx_file_path, engine_file_path, verbose=False):
    """Takes an ONNX file and creates a TensorRT engine."""
    TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(*EXPLICIT_BATCH) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        builder.max_workspace_size = 1 << 28
        builder.max_batch_size = 1
        builder.fp16_mode = True
        #builder.strict_type_constraints = True

        # Parse model file
        print('Loading ONNX file from path {}...'.format(onnx_file_path))
        with open(onnx_file_path, 'rb') as model:
            print('Beginning ONNX file parsing')
            if not parser.parse(model.read()):
                print('ERROR: Failed to parse the ONNX file.')
                for error in range(parser.num_errors):
                    print(parser.get_error(error))
                return None
        if trt.__version__[0] >= '7':
            # The actual yolov3.onnx is generated with batch size 64.
            # Reshape input to batch size 1
            shape = list(network.get_input(0).shape)
            shape[0] = 1
            network.get_input(0).shape = shape
        print('Completed parsing of ONNX file')

        print('Building an engine; this may take a while...')
        engine = builder.build_cuda_engine(network)
        print('Completed creating engine')
        with open(engine_file_path, 'wb') as f:
            f.write(engine.serialize())
        return engine

Python tensorrt.Builder() Examples