introduction
PPQ 的图调度功能
展示如何自由调度算子,并实现混合精度推理
code
import torch
import torchvision
from ppq import *
from ppq.api import *# ------------------------------------------------------------
# 在这个脚本中,我们将向你展示如何自由调度算子,并实现混合精度推理
# 在开始之前,我们首先设计一个支持混合精度的量化器
# ------------------------------------------------------------
class MyQuantizer(BaseQuantizer):# ------------------------------------------------------------# quant_operation_types 是一个类型枚举,在这里你需要写下所有该量化器所需要量化的算子# ------------------------------------------------------------@ propertydef quant_operation_types(self) -> set:return {'Conv'}# ------------------------------------------------------------# 一旦你确定了那些算子需要量化,则需要在 init_quantize_config 为他们初始化量化信息# 然而你需要注意的是,由于手动调度的存在,用户可以强制调度一个类型不在 quant_operation_types 中的算子来到量化平台# 我建议你针对这类情况进行回应。或者,在探测到算子类型并非可量化类型后进行报错# ------------------------------------------------------------def init_quantize_config(self, operation: Operation) -> OperationQuantizationConfig:# ------------------------------------------------------------# 为卷积算子初始化量化信息,只量化卷积算子的输入(input & weight),bias 不做量化# ------------------------------------------------------------if operation.type == 'Conv':config = self.create_default_quant_config(operation_meta = operation.meta_data, num_of_bits = 4,quant_max = 15, quant_min = -16,observer_algorithm = 'percentile', policy = QuantizationPolicy(QuantizationProperty.PER_TENSOR +QuantizationProperty.LINEAR +QuantizationProperty.SYMMETRICAL),rounding = RoundingPolicy.ROUND_HALF_EVEN)# ------------------------------------------------------------# 关闭所有输出量化,状态设置为fp32# ------------------------------------------------------------for tensor_quant_config in config.output_quantization_config:tensor_quant_config.state = QuantizationStates.FP32# ------------------------------------------------------------# 关闭 bias 量化,状态设置为fp32# ------------------------------------------------------------if operation.num_of_input == 3:config.input_quantization_config[-1].state = QuantizationStates.FP32# ------------------------------------------------------------# 如果算子被调度到 INT8 平台上,执行 INT8 的量化# ------------------------------------------------------------if operation.platform == TargetPlatform.ACADEMIC_INT8:print(f'{operation.name} has been dispatched to INT8')config.input_quantization_config[0].num_of_bits = 8config.input_quantization_config[0].quant_max = 127config.input_quantization_config[0].quant_min = -128config.input_quantization_config[1].num_of_bits = 8config.input_quantization_config[1].quant_max = 127config.input_quantization_config[1].quant_min = -128return configelse:raise TypeError(f'Unsupported Op Type: {operation.type}')# ------------------------------------------------------------# 当前量化器进行量化的算子都将被发往一个指定的目标平台# 这里我们选择 TargetPlatform.ACADEMIC_INT4 作为目标平台# ------------------------------------------------------------@ propertydef target_platform(self) -> TargetPlatform:return TargetPlatform.ACADEMIC_INT4# 注册我们的量化器
register_network_quantizer(MyQuantizer, platform=TargetPlatform.ACADEMIC_INT4)# ------------------------------------------------------------
# 下面,我们向你展示 PPQ 的手动调度逻辑
# 我们仍然以 MobilenetV2 举例,向你展示如何完成混合精度调度
# ------------------------------------------------------------BATCHSIZE = 32
INPUT_SHAPE = [BATCHSIZE, 3, 224, 224]
DEVICE = 'cuda'
PLATFORM = TargetPlatform.ACADEMIC_INT4
CALIBRATION = [torch.rand(size=INPUT_SHAPE) for _ in range(32)]
def collate_fn(batch: torch.Tensor) -> torch.Tensor: return batch.to(DEVICE)model = torchvision.models.mobilenet.mobilenet_v2(pretrained=True)
model = model.to(DEVICE)# ------------------------------------------------------------
# 为了执行手动调度,你必须首先创建 QuantizationSetting
# 使用 QuantizationSetting.dispatch_table 属性来传递调度方案
# 大多数预制量化器没有针对 INT4 写过量化方案,因此只支持 FP32 - INT8 的相互调度
# ------------------------------------------------------------
QS = QuantizationSettingFactory.default_setting()# 示例语句:下面的语句将把 Op1 调度到 FP32 平台
# QS.dispatching_table.append(operation='Op1', platform=TargetPlatform.FP32)# ------------------------------------------------------------
# 我如何知道调度那些层到高精度会得到最优的性能表现?
# layerwise_error_analyse 函数正是为此设计的,我们调用该方法
# 然后选择那些误差较高的层调度到高精度平台# 我们首先将所有层全部送往 INT4,然后执行误差分析
# ------------------------------------------------------------# ------------------------------------------------------------
# 如果你使用 ENABLE_CUDA_KERNEL 方法
# PPQ 将会尝试编译自定义的高性能量化算子,这一过程需要编译环境的支持
# 如果你在编译过程中发生错误,你可以删除此处对于 ENABLE_CUDA_KERNEL 方法的调用
# 这将显著降低 PPQ 的运算速度;但即使你无法编译这些算子,你仍然可以使用 pytorch 的 gpu 算子完成量化
# ------------------------------------------------------------
with ENABLE_CUDA_KERNEL():dump_torch_to_onnx(model=model, onnx_export_file='./model.onnx', input_shape=INPUT_SHAPE, input_dtype=torch.float32)graph = load_onnx_graph(onnx_import_file='./model.onnx')quantized = quantize_native_model(model=graph, calib_dataloader=CALIBRATION,calib_steps=32, input_shape=INPUT_SHAPE,collate_fn=collate_fn, platform=PLATFORM,device=DEVICE, verbose=0, setting=QS)# ------------------------------------------------------------# 使用 graphwise_analyse 衡量调度前的量化误差# ------------------------------------------------------------reports = graphwise_error_analyse(graph=quantized, running_device=DEVICE, collate_fn=collate_fn,dataloader=CALIBRATION)# ------------------------------------------------------------# 执行逐层分析,结果是一个字典,该字典内写入了所有算子的单层量化误差# ------------------------------------------------------------reports = layerwise_error_analyse(graph=quantized, running_device=DEVICE, collate_fn=collate_fn,dataloader=CALIBRATION, verbose=False)# 从大到小排序单层误差sensitivity = [(op_name, error) for op_name, error in reports.items()]sensitivity = sorted(sensitivity, key=lambda x: x[1], reverse=True)# ------------------------------------------------------------# 将前十个误差最大的层送上INT8,并重新量化# ------------------------------------------------------------for op_name, _ in sensitivity[: 10]:QS.dispatching_table.append(operation=op_name, platform=TargetPlatform.ACADEMIC_INT8)graph = load_onnx_graph(onnx_import_file='./model.onnx')quantized = quantize_native_model(model=graph, calib_dataloader=CALIBRATION,calib_steps=32, input_shape=INPUT_SHAPE,collate_fn=collate_fn, platform=PLATFORM,device=DEVICE, verbose=0, setting=QS)# ------------------------------------------------------------# 使用 graphwise_analyse 衡量最终的量化误差# ------------------------------------------------------------reports = graphwise_error_analyse(graph=quantized, running_device=DEVICE, collate_fn=collate_fn,dataloader=CALIBRATION)
result
____ ____ __ ____ __ __/ __ \/ __ \/ / / __ \__ ______ _____ / /_____ ____ / // /_/ / /_/ / / / / / / / / / __ `/ __ \/ __/ __ \/ __ \/ // ____/ ____/ /__/ /_/ / /_/ / /_/ / / / / /_/ /_/ / /_/ / //_/ /_/ /_____\___\_\__,_/\__,_/_/ /_/\__/\____/\____/_/[31m[Warning] Compling Kernels... Please wait (It will take a few minutes).[0m
[05:38:56] PPQ Quantization Config Refine Pass Running ... Finished.
[05:38:56] PPQ Quantization Fusion Pass Running ... [31m[Warning] Unexpected dispatching was found: Op Conv_0 and Clip_3 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_4 and Clip_7 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_9 and Clip_12 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_13 and Clip_16 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_18 and Clip_21 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_22 and Clip_25 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_28 and Clip_31 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_32 and Clip_35 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_37 and Clip_40 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_41 and Clip_44 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_47 and Clip_50 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_51 and Clip_54 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_57 and Clip_60 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_61 and Clip_64 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_66 and Clip_69 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_70 and Clip_73 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_76 and Clip_79 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_80 and Clip_83 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_86 and Clip_89 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_90 and Clip_93 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_96 and Clip_99 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_100 and Clip_103 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_105 and Clip_108 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_109 and Clip_112 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_115 and Clip_118 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_119 and Clip_122 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_125 and Clip_128 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_129 and Clip_132 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_134 and Clip_137 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_138 and Clip_141 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_144 and Clip_147 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_148 and Clip_151 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_154 and Clip_157 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_158 and Clip_161 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_163 and Clip_166 should both be quantized operation.[0m
Finished.
[05:38:56] PPQ Quantize Simplify Pass Running ... Finished.
[05:38:56] PPQ Parameter Quantization Pass Running ... Finished.
[05:38:56] PPQ Runtime Calibration Pass Running ...
Calibration Progress(Phase 1): 0%| | 0/32 [00:00<?, ?it/s]
Calibration Progress(Phase 1): 3%|▎ | 1/32 [00:00<00:03, 7.90it/s]
Calibration Progress(Phase 1): 9%|▉ | 3/32 [00:00<00:02, 10.00it/s]
Calibration Progress(Phase 1): 16%|█▌ | 5/32 [00:00<00:02, 10.62it/s]
Calibration Progress(Phase 1): 22%|██▏ | 7/32 [00:00<00:02, 11.00it/s]
Calibration Progress(Phase 1): 28%|██▊ | 9/32 [00:00<00:02, 11.09it/s]
Calibration Progress(Phase 1): 34%|███▍ | 11/32 [00:01<00:01, 11.19it/s]
Calibration Progress(Phase 1): 41%|████ | 13/32 [00:01<00:01, 11.25it/s]
Calibration Progress(Phase 1): 47%|████▋ | 15/32 [00:01<00:01, 11.35it/s]
Calibration Progress(Phase 1): 53%|█████▎ | 17/32 [00:01<00:01, 11.47it/s]
Calibration Progress(Phase 1): 59%|█████▉ | 19/32 [00:01<00:01, 11.51it/s]
Calibration Progress(Phase 1): 66%|██████▌ | 21/32 [00:01<00:00, 11.37it/s]
Calibration Progress(Phase 1): 72%|███████▏ | 23/32 [00:02<00:00, 11.38it/s]
Calibration Progress(Phase 1): 78%|███████▊ | 25/32 [00:02<00:00, 11.33it/s]
Calibration Progress(Phase 1): 84%|████████▍ | 27/32 [00:02<00:00, 11.28it/s]
Calibration Progress(Phase 1): 91%|█████████ | 29/32 [00:02<00:00, 11.24it/s]
Calibration Progress(Phase 1): 97%|█████████▋| 31/32 [00:02<00:00, 11.31it/s]
Calibration Progress(Phase 1): 100%|██████████| 32/32 [00:02<00:00, 11.19it/s]
Finished.
[05:38:59] PPQ Quantization Alignment Pass Running ... Finished.
[05:38:59] PPQ Passive Parameter Quantization Running ... Finished.
[05:38:59] PPQ Parameter Baking Pass Running ... Finished.
--------- Network Snapshot ---------
Num of Op: [100]
Num of Quantized Op: [52]
Num of Variable: [277]
Num of Quantized Var: [201]
------- Quantization Snapshot ------
Num of Quant Config: [208]
BAKED: [52]
ACTIVATED: [52]
FP32: [104]
Network Quantization Finished.Analysing Graphwise Quantization Error(Phrase 1):: 0%| | 0/8 [00:00<?, ?it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 12%|█▎ | 1/8 [00:00<00:00, 9.44it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 25%|██▌ | 2/8 [00:00<00:00, 9.57it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 38%|███▊ | 3/8 [00:00<00:00, 9.38it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 50%|█████ | 4/8 [00:00<00:00, 9.30it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 62%|██████▎ | 5/8 [00:00<00:00, 9.25it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 75%|███████▌ | 6/8 [00:00<00:00, 9.22it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 88%|████████▊ | 7/8 [00:00<00:00, 9.21it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 100%|██████████| 8/8 [00:00<00:00, 9.20it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 100%|██████████| 8/8 [00:00<00:00, 8.22it/s]Analysing Graphwise Quantization Error(Phrase 2):: 0%| | 0/8 [00:00<?, ?it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 12%|█▎ | 1/8 [00:00<00:00, 7.84it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 25%|██▌ | 2/8 [00:00<00:00, 8.24it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 38%|███▊ | 3/8 [00:00<00:00, 8.28it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 50%|█████ | 4/8 [00:00<00:00, 8.40it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 62%|██████▎ | 5/8 [00:00<00:00, 8.47it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 75%|███████▌ | 6/8 [00:00<00:00, 8.51it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 88%|████████▊ | 7/8 [00:00<00:00, 8.52it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 100%|██████████| 8/8 [00:00<00:00, 8.54it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 100%|██████████| 8/8 [00:01<00:00, 7.46it/s]
Layer | NOISE:SIGNAL POWER RATIO
Conv_162: | ████████████████████ | 4.469564
Conv_163: | █████████████████ | 3.796862
Conv_55: | ███████████ | 2.358960
Conv_45: | █████████ | 1.916194
Conv_104: | ████████ | 1.848709
Conv_65: | ███████ | 1.608640
Conv_36: | ███████ | 1.527322
Conv_113: | ███████ | 1.489844
Conv_125: | ██████ | 1.338494
Conv_26: | ██████ | 1.292400
Conv_123: | ██████ | 1.233793
Conv_133: | █████ | 1.195669
Conv_148: | █████ | 1.163949
Conv_84: | █████ | 1.156629
Conv_134: | █████ | 1.155597
Conv_142: | █████ | 1.123850
Conv_144: | █████ | 1.119863
Conv_138: | █████ | 1.115643
Conv_70: | █████ | 1.106950
Conv_152: | █████ | 1.100367
Conv_90: | █████ | 1.048857
Conv_94: | █████ | 1.035837
Conv_51: | █████ | 1.009898
Conv_115: | ████ | 0.957407
Conv_57: | ████ | 0.947991
Conv_80: | ████ | 0.945680
Conv_154: | ████ | 0.931150
Conv_105: | ████ | 0.915809
Conv_109: | ████ | 0.912738
Conv_41: | ████ | 0.876404
Conv_86: | ████ | 0.841202
Conv_17: | ████ | 0.796938
Conv_96: | ████ | 0.784382
Conv_100: | ███ | 0.760816
Conv_129: | ███ | 0.756461
Conv_119: | ███ | 0.754041
Conv_74: | ███ | 0.744942
Conv_66: | ███ | 0.729332
Conv_32: | ███ | 0.726990
Conv_28: | ███ | 0.704079
Conv_47: | ███ | 0.653307
Conv_76: | ███ | 0.618381
Conv_8: | ███ | 0.603043
Conv_22: | ███ | 0.602080
Conv_37: | ██ | 0.509791
Conv_61: | ██ | 0.506709
Conv_13: | ██ | 0.495827
Conv_9: | ██ | 0.404825
Conv_158: | ██ | 0.392166
Conv_18: | █ | 0.221014
Conv_4: | | 0.082779
Conv_0: | | 0.001353Analysing Layerwise quantization error:: 0%| | 0/52 [00:00<?, ?it/s]
Analysing Layerwise quantization error:: 2%|▏ | 1/52 [00:00<00:12, 4.01it/s]
Analysing Layerwise quantization error:: 4%|▍ | 2/52 [00:00<00:12, 3.98it/s]
Analysing Layerwise quantization error:: 6%|▌ | 3/52 [00:00<00:12, 3.99it/s]
Analysing Layerwise quantization error:: 8%|▊ | 4/52 [00:01<00:11, 4.00it/s]
Analysing Layerwise quantization error:: 10%|▉ | 5/52 [00:01<00:11, 3.99it/s]
Analysing Layerwise quantization error:: 12%|█▏ | 6/52 [00:01<00:11, 4.00it/s]
Analysing Layerwise quantization error:: 13%|█▎ | 7/52 [00:01<00:11, 3.99it/s]
Analysing Layerwise quantization error:: 15%|█▌ | 8/52 [00:02<00:11, 4.00it/s]
Analysing Layerwise quantization error:: 17%|█▋ | 9/52 [00:02<00:10, 4.00it/s]
Analysing Layerwise quantization error:: 19%|█▉ | 10/52 [00:02<00:10, 4.01it/s]
Analysing Layerwise quantization error:: 21%|██ | 11/52 [00:02<00:10, 4.01it/s]
Analysing Layerwise quantization error:: 23%|██▎ | 12/52 [00:03<00:10, 4.00it/s]
Analysing Layerwise quantization error:: 25%|██▌ | 13/52 [00:03<00:09, 4.00it/s]
Analysing Layerwise quantization error:: 27%|██▋ | 14/52 [00:03<00:09, 4.00it/s]
Analysing Layerwise quantization error:: 29%|██▉ | 15/52 [00:03<00:09, 4.01it/s]
Analysing Layerwise quantization error:: 31%|███ | 16/52 [00:03<00:08, 4.01it/s]
Analysing Layerwise quantization error:: 33%|███▎ | 17/52 [00:04<00:08, 3.99it/s]
Analysing Layerwise quantization error:: 35%|███▍ | 18/52 [00:04<00:08, 4.00it/s]
Analysing Layerwise quantization error:: 37%|███▋ | 19/52 [00:04<00:08, 4.01it/s]
Analysing Layerwise quantization error:: 38%|███▊ | 20/52 [00:04<00:07, 4.01it/s]
Analysing Layerwise quantization error:: 40%|████ | 21/52 [00:05<00:07, 4.00it/s]
Analysing Layerwise quantization error:: 42%|████▏ | 22/52 [00:05<00:07, 4.00it/s]
Analysing Layerwise quantization error:: 44%|████▍ | 23/52 [00:05<00:07, 4.01it/s]
Analysing Layerwise quantization error:: 46%|████▌ | 24/52 [00:05<00:06, 4.01it/s]
Analysing Layerwise quantization error:: 48%|████▊ | 25/52 [00:06<00:06, 4.00it/s]
Analysing Layerwise quantization error:: 50%|█████ | 26/52 [00:06<00:06, 4.00it/s]
Analysing Layerwise quantization error:: 52%|█████▏ | 27/52 [00:06<00:06, 4.00it/s]
Analysing Layerwise quantization error:: 54%|█████▍ | 28/52 [00:07<00:06, 4.00it/s]
Analysing Layerwise quantization error:: 56%|█████▌ | 29/52 [00:07<00:05, 4.01it/s]
Analysing Layerwise quantization error:: 58%|█████▊ | 30/52 [00:07<00:05, 4.00it/s]
Analysing Layerwise quantization error:: 60%|█████▉ | 31/52 [00:07<00:05, 3.98it/s]
Analysing Layerwise quantization error:: 62%|██████▏ | 32/52 [00:08<00:05, 3.99it/s]
Analysing Layerwise quantization error:: 63%|██████▎ | 33/52 [00:08<00:04, 3.99it/s]
Analysing Layerwise quantization error:: 65%|██████▌ | 34/52 [00:08<00:04, 3.97it/s]
Analysing Layerwise quantization error:: 67%|██████▋ | 35/52 [00:08<00:04, 3.99it/s]
Analysing Layerwise quantization error:: 69%|██████▉ | 36/52 [00:09<00:04, 3.91it/s]
Analysing Layerwise quantization error:: 71%|███████ | 37/52 [00:09<00:03, 3.93it/s]
Analysing Layerwise quantization error:: 73%|███████▎ | 38/52 [00:09<00:03, 3.95it/s]
Analysing Layerwise quantization error:: 75%|███████▌ | 39/52 [00:09<00:03, 3.94it/s]
Analysing Layerwise quantization error:: 77%|███████▋ | 40/52 [00:10<00:03, 3.95it/s]
Analysing Layerwise quantization error:: 79%|███████▉ | 41/52 [00:10<00:02, 3.89it/s]
Analysing Layerwise quantization error:: 81%|████████ | 42/52 [00:10<00:02, 3.87it/s]
Analysing Layerwise quantization error:: 83%|████████▎ | 43/52 [00:10<00:02, 3.87it/s]
Analysing Layerwise quantization error:: 85%|████████▍ | 44/52 [00:11<00:02, 3.91it/s]
Analysing Layerwise quantization error:: 87%|████████▋ | 45/52 [00:11<00:01, 3.91it/s]
Analysing Layerwise quantization error:: 88%|████████▊ | 46/52 [00:11<00:01, 3.90it/s]
Analysing Layerwise quantization error:: 90%|█████████ | 47/52 [00:11<00:01, 3.93it/s]
Analysing Layerwise quantization error:: 92%|█████████▏| 48/52 [00:12<00:01, 3.83it/s]
Analysing Layerwise quantization error:: 94%|█████████▍| 49/52 [00:12<00:00, 3.85it/s]
Analysing Layerwise quantization error:: 96%|█████████▌| 50/52 [00:12<00:00, 3.88it/s]
Analysing Layerwise quantization error:: 98%|█████████▊| 51/52 [00:12<00:00, 3.85it/s]
Analysing Layerwise quantization error:: 100%|██████████| 52/52 [00:13<00:00, 3.79it/s]
Analysing Layerwise quantization error:: 100%|██████████| 52/52 [00:13<00:00, 3.95it/s]
Conv_4 has been dispatched to INT8
Conv_13 has been dispatched to INT8
Conv_22 has been dispatched to INT8
Conv_32 has been dispatched to INT8
Conv_41 has been dispatched to INT8
Conv_70 has been dispatched to INT8
Conv_80 has been dispatched to INT8
Conv_109 has been dispatched to INT8
Conv_148 has been dispatched to INT8
Conv_158 has been dispatched to INT8
[05:39:15] PPQ Quantization Config Refine Pass Running ... Finished.
[05:39:15] PPQ Quantization Fusion Pass Running ... [31m[Warning] Unexpected dispatching was found: Op Conv_0 and Clip_3 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_4 and Clip_7 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_9 and Clip_12 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_13 and Clip_16 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_18 and Clip_21 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_22 and Clip_25 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_28 and Clip_31 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_32 and Clip_35 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_37 and Clip_40 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_41 and Clip_44 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_47 and Clip_50 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_51 and Clip_54 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_57 and Clip_60 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_61 and Clip_64 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_66 and Clip_69 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_70 and Clip_73 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_76 and Clip_79 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_80 and Clip_83 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_86 and Clip_89 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_90 and Clip_93 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_96 and Clip_99 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_100 and Clip_103 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_105 and Clip_108 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_109 and Clip_112 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_115 and Clip_118 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_119 and Clip_122 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_125 and Clip_128 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_129 and Clip_132 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_134 and Clip_137 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_138 and Clip_141 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_144 and Clip_147 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_148 and Clip_151 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_154 and Clip_157 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_158 and Clip_161 should both be quantized operation.[0m
[31m[Warning] Unexpected dispatching was found: Op Conv_163 and Clip_166 should both be quantized operation.[0m
Finished.
[05:39:15] PPQ Quantize Simplify Pass Running ... Finished.
[05:39:15] PPQ Parameter Quantization Pass Running ... Finished.
[05:39:15] PPQ Runtime Calibration Pass Running ...
Calibration Progress(Phase 1): 0%| | 0/32 [00:00<?, ?it/s]
Calibration Progress(Phase 1): 6%|▋ | 2/32 [00:00<00:02, 12.12it/s]
Calibration Progress(Phase 1): 12%|█▎ | 4/32 [00:00<00:02, 13.05it/s]
Calibration Progress(Phase 1): 19%|█▉ | 6/32 [00:00<00:01, 13.25it/s]
Calibration Progress(Phase 1): 25%|██▌ | 8/32 [00:00<00:01, 13.44it/s]
Calibration Progress(Phase 1): 31%|███▏ | 10/32 [00:00<00:01, 13.48it/s]
Calibration Progress(Phase 1): 38%|███▊ | 12/32 [00:00<00:01, 13.28it/s]
Calibration Progress(Phase 1): 44%|████▍ | 14/32 [00:01<00:01, 13.10it/s]
Calibration Progress(Phase 1): 50%|█████ | 16/32 [00:01<00:01, 12.92it/s]
Calibration Progress(Phase 1): 56%|█████▋ | 18/32 [00:01<00:01, 12.99it/s]
Calibration Progress(Phase 1): 62%|██████▎ | 20/32 [00:01<00:00, 13.03it/s]
Calibration Progress(Phase 1): 69%|██████▉ | 22/32 [00:01<00:00, 12.95it/s]
Calibration Progress(Phase 1): 75%|███████▌ | 24/32 [00:01<00:00, 13.02it/s]
Calibration Progress(Phase 1): 81%|████████▏ | 26/32 [00:01<00:00, 13.10it/s]
Calibration Progress(Phase 1): 88%|████████▊ | 28/32 [00:02<00:00, 13.05it/s]
Calibration Progress(Phase 1): 94%|█████████▍| 30/32 [00:02<00:00, 13.02it/s]
Calibration Progress(Phase 1): 100%|██████████| 32/32 [00:02<00:00, 13.00it/s]
Calibration Progress(Phase 1): 100%|██████████| 32/32 [00:02<00:00, 13.06it/s]
Finished.
[05:39:17] PPQ Quantization Alignment Pass Running ... Finished.
[05:39:17] PPQ Passive Parameter Quantization Running ... Finished.
[05:39:17] PPQ Parameter Baking Pass Running ... Finished.
--------- Network Snapshot ---------
Num of Op: [100]
Num of Quantized Op: [52]
Num of Variable: [277]
Num of Quantized Var: [201]
------- Quantization Snapshot ------
Num of Quant Config: [208]
BAKED: [52]
ACTIVATED: [52]
FP32: [104]
Network Quantization Finished.Analysing Graphwise Quantization Error(Phrase 1):: 0%| | 0/8 [00:00<?, ?it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 12%|█▎ | 1/8 [00:00<00:01, 6.86it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 25%|██▌ | 2/8 [00:00<00:00, 7.97it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 38%|███▊ | 3/8 [00:00<00:00, 8.36it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 50%|█████ | 4/8 [00:00<00:00, 8.61it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 62%|██████▎ | 5/8 [00:00<00:00, 8.65it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 75%|███████▌ | 6/8 [00:00<00:00, 8.65it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 88%|████████▊ | 7/8 [00:00<00:00, 8.55it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 100%|██████████| 8/8 [00:00<00:00, 8.70it/s]
Analysing Graphwise Quantization Error(Phrase 1):: 100%|██████████| 8/8 [00:01<00:00, 7.60it/s]Analysing Graphwise Quantization Error(Phrase 2):: 0%| | 0/8 [00:00<?, ?it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 12%|█▎ | 1/8 [00:00<00:00, 7.75it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 25%|██▌ | 2/8 [00:00<00:00, 8.14it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 38%|███▊ | 3/8 [00:00<00:00, 8.18it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 50%|█████ | 4/8 [00:00<00:00, 8.25it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 62%|██████▎ | 5/8 [00:00<00:00, 7.03it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 75%|███████▌ | 6/8 [00:00<00:00, 6.16it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 88%|████████▊ | 7/8 [00:01<00:00, 5.92it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 100%|██████████| 8/8 [00:01<00:00, 5.76it/s]
Analysing Graphwise Quantization Error(Phrase 2):: 100%|██████████| 8/8 [00:01<00:00, 5.64it/s]
Layer | NOISE:SIGNAL POWER RATIO
Conv_162: | ████████████████████ | 2.602241
Conv_163: | █████████████████ | 2.160837
Conv_142: | ████████ | 0.991362
Conv_152: | ███████ | 0.943710
Conv_133: | ███████ | 0.885206
Conv_154: | ██████ | 0.840136
Conv_144: | ██████ | 0.808373
Conv_113: | ██████ | 0.771283
Conv_123: | ██████ | 0.761448
Conv_55: | ██████ | 0.751858
Conv_148: | ██████ | 0.734521
Conv_134: | █████ | 0.700948
Conv_138: | █████ | 0.688839
Conv_104: | █████ | 0.653958
Conv_125: | █████ | 0.606137
Conv_129: | ████ | 0.512940
Conv_94: | ███ | 0.420290
Conv_26: | ███ | 0.380846
Conv_65: | ███ | 0.348712
Conv_45: | ███ | 0.326942
Conv_115: | ██ | 0.313652
Conv_51: | ██ | 0.309173
Conv_90: | ██ | 0.308499
Conv_109: | ██ | 0.307272
Conv_119: | ██ | 0.305434
Conv_84: | ██ | 0.300523
Conv_105: | ██ | 0.291879
Conv_100: | ██ | 0.289319
Conv_36: | ██ | 0.285215
Conv_158: | ██ | 0.242913
Conv_57: | ██ | 0.236399
Conv_96: | ██ | 0.228828
Conv_28: | ██ | 0.224474
Conv_41: | ██ | 0.221757
Conv_70: | ██ | 0.210314
Conv_86: | ██ | 0.198304
Conv_80: | █ | 0.193799
Conv_22: | █ | 0.188963
Conv_17: | █ | 0.183890
Conv_47: | █ | 0.162605
Conv_32: | █ | 0.143125
Conv_74: | █ | 0.141152
Conv_66: | █ | 0.135066
Conv_8: | █ | 0.122442
Conv_76: | █ | 0.115116
Conv_37: | █ | 0.103618
Conv_13: | █ | 0.101174
Conv_9: | █ | 0.094143
Conv_61: | █ | 0.093169
Conv_18: | █ | 0.069118
Conv_4: | | 0.007497
Conv_0: | | 0.001353