TensorRT编程 - 单层感知机结构
build.cpp
# include <iostream>
# include <NvInfer.h>
class TRTLogger : public nvinfer1 :: ILogger
{ void log ( Severity severity, const char * msg) noexcept override { if ( severity == Severity:: kINFO) { std :: cout << msg << std :: endl; } }
} gLogger;
void saveWeights ( const std :: string & filename, const float * data, int size)
{ std :: ofstream outfile ( filename, std :: ios :: binary) ; assert ( outfile. is_open ( ) && "save weights failed" ) ; outfile. write ( ( char * ) ( & size) , sizeof ( int ) ) ; outfile. write ( ( char * ) ( data) , size * sizeof ( float ) ) ; outfile. close ( ) ; }
std :: vector< float > loadWeights ( const std :: string & filename)
{ std :: ifstream infile ( filename, std :: ios :: binary) ; assert ( infile. is_open ( ) && "load weights failed" ) ; int size; infile. read ( ( char * ) ( & size) , sizeof ( int ) ) ; std :: vector< float > data ( size) ; infile. read ( ( char * ) ( data. data ( ) ) , size * sizeof ( float ) ) ; infile. close ( ) ; return data; } int main ( )
{ TRTLogger logger; nvinfer1 :: IBuilder* builder = nvinfer1 :: createInferBuilder ( logger) ; nvinfer1 :: INetworkDefinition * network = builder -> createNetworkV2 ( 1 ) ; const int input_size = 3 ; nvinfer1 :: ITensor * input = network -> addInput ( "data" , nvinfer1 :: DataType :: kFLOAT, nvinfer1 :: Dims4 ( 1 , input_size, 1 , 1 ) ) ; const float * fc1_weight_data = new float [ input_size * 2 ] { 0.1 , 0.2 , 0.3 , 0.4 , 0.5 , 0.6 } ; const float * fc1_bias_data = new float [ 2 ] { 0.1 , 0.5 } ; saveWeights ( "./model/fc1.wts" , fc1_weight_data, 6 ) ; saveWeights ( "./model/fc1.bias" , fc1_bias_data, 2 ) ; auto fc1_weights_vec = loadWeights ( "./model/fc1.wts" ) ; auto fc1_bias_vec = loadWeights ( "./model/fc1.bias" ) ; nvinfer1 :: Weights fc1_weight { nvinfer1 :: DataType :: kFLOAT, fc1_weights_vec. data ( ) , fc1_weights_vec. size ( ) } ; nvinfer1 :: Weights fc1_bias { nvinfer1 :: DataType :: kFLOAT, fc1_bias_vec. data ( ) , fc1_bias_vec. size ( ) } ; const int output_size = 2 ; nvinfer1 :: IFullyConnectedLayer * fc1 = network -> addFullyConnected ( * input, output_size, fc1_weight, fc1_bias) ; nvinfer1 :: IActivationLayer * sigmoid = network -> addActivation ( * fc1-> getOutput ( 0 ) , nvinfer1 :: ActivationType :: kSIGMOID) ; sigmoid -> getOutput ( 0 ) -> setName ( "output" ) ; network -> markOutput ( * sigmoid-> getOutput ( 0 ) ) ; builder -> setMaxBatchSize ( 1 ) ; nvinfer1 :: IBuilderConfig * config = builder -> createBuilderConfig ( ) ; config -> setMaxWorkspaceSize ( 1 << 28 ) ; nvinfer1 :: ICudaEngine * engine = builder -> buildEngineWithConfig ( * network, * config) ; if ( ! engine) { std :: cout << "Failed to create engine!" << std :: endl; return - 1 ; } nvinfer1 :: IHostMemory * serialized_engine = engine-> serialize ( ) ; std :: ofstream outfile ( "./model/mlp.engine" , std :: ios :: binary) ; assert ( outfile. is_open ( ) && "Failed to open file for writing" ) ; outfile. write ( ( char * ) serialized_engine-> data ( ) , serialized_engine-> size ( ) ) ; outfile. close ( ) ; delete serialized_engine; delete engine; delete config; delete network; delete builder; std :: cout << "engine 文件生成成功" << std :: endl; return 0 ; }
runtime.cu
# include <iostream>
# include <vector>
# include <cassert>
# include <fstream> # include <NvInfer.h>
# include <cuda_runtime.h>
class TRTLogger : public nvinfer1 :: ILogger
{ void log ( Severity severity, const char * msg) noexcept override { if ( severity == Severity:: kINFO) { std :: cout << msg << std :: endl; } }
} gLogger;
std :: vector< unsigned char > loadEngineModel ( const std :: string & filename)
{ std :: ifstream infile ( filename, std :: ios :: binary) ; assert ( infile. is_open ( ) && "load weights failed" ) ; file. seekg ( 0 , std :: ios :: binary) ; size_t size = file. tellg ( ) ; std :: vector< unsigned char > data ( size) ; file. seekg ( 0 , std :: ios :: beg) ; file. read ( ( char * ) ( data. data ( ) ) , size) ; file. close ( ) ; return data; } int main ( )
{ TRTLogger logger; nvinfer1 :: IRuntime * runtime = nvinfer1 :: createInferRuntime ( logger) ; auto engineModel = loadEngineModel ( "./model/mlp.engine" ) ; nvinfer1 :: ICudaEngine * engine = runtime -> deserializeCudaEngine ( engineModel. data ( ) , engineModel. size ( ) , nullprt) ; if ( ! engine) { std :: cout << "deserialize engine failed" << std :: endl; return - 1 ; } nvinfer1 :: IExecutionContext * context = engine -> createExecutionContext ( ) ; cudaStream_t stream = nullptr ; cudaStreamCreate ( & stream) ; float * host_input_data = new float [ 3 ] { 2 , 4 , 8 } ; int input_data_size = 3 * sizeof ( float ) ; float * device_input_data = nullptr ; float * host_output_data = new float [ 3 ] { 0 , 0 , } ; int output_data_size = 2 * sizeof ( float ) ; float * device_output_data = nullptr ; cudaMalloc ( ( void * * ) & device_input_data, input_data_size) ; cudaMalloc ( ( void * * ) & device_output_data, output_data_size) ; cudaMemcpyAsync ( device_input_data, host_input_data, input_data_size, cudaMemcpyHostToDevice, stream) ; float * bindings[ ] = { device_input_data, device_output_data} ; bool success = context -> enqueueV2 ( ( void * * ) bindings, stream, nullptr ) ; cudaMemcpyAsync ( host_output_data, device_output_data, output_data_size, cudaMemcpyDeviceToHost, stream) ; cudaStreamSynchronize ( stream) ; std :: cout << "输出结果: " << host_output_data[ 0 ] << " " << host_output_data[ 1 ] << std :: endl; cudaStreamDestroy ( stream) ; cudaFree ( device_input_data) ; cudaFree ( device_output_data) ; delete host_input_data; delete host_output_data; delete context; delete engine; delete runtime; return 0 ; }