0, 无参数 kernel 汇编语言示例
./param_00.hip
__global__ void WWWWW()
{((int*)0x8888888)[3] = 0x77777;
}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_00.hip -o param_00.s
_Z5WWWWWv: ; @_Z5WWWWWv
; %bb.0: ; %entry
v_mov_b32_e32 v0, 0x8888894
v_mov_b32_e32 v1, 0
v_mov_b32_e32 v2, 0x77777
flat_store_dword v[0:1], v2
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5WWWWWv
1,一个参数的 kernel 的汇编语言示例
param_01.hip
__global__ void MMMMM(int* A)
{A[10] = 0x77777;}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_01.hip -o param_01.s
_Z5MMMMMPi: ; @_Z5MMMMMPi
; %bb.0: ; %entry
s_load_dwordx2 s[0:1], s[4:5], 0x0
v_mov_b32_e32 v0, 0
v_mov_b32_e32 v1, 0x77777
s_waitcnt lgkmcnt(0)
global_store_dword v0, v1, s[0:1] offset:40
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5MMMMMPi
2, 两个参数的kernel 的汇编语言
param_02.hip
__global__ void MMMMM(int* AA, int *BB)
{AA[10] = 0x77777;BB[20] = 0x33333;}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_02.hip -o param_02.s
_Z5MMMMMPiS_: ; @_Z5MMMMMPiS_
; %bb.0: ; %entry
s_load_dwordx4 s[0:3], s[4:5], 0x0
v_mov_b32_e32 v0, 0
v_mov_b32_e32 v1, 0x77777
v_mov_b32_e32 v2, 0x33333
s_waitcnt lgkmcnt(0)
global_store_dword v0, v1, s[0:1] offset:40
global_store_dword v0, v2, s[2:3] offset:80
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5MMMMMPiS_
3, 三个参数的kernel 的汇编语言
param_03.hip
__global__ void MMMMM(int* AA, int* BB, int* CC)
{AA[10] = 0x77777;BB[20] = 0x33333;CC[30] = 0x12121;}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_03.hip -o param_03.s
_Z5MMMMMPiS_S_: ; @_Z5MMMMMPiS_S_
; %bb.0: ; %entry
s_load_dwordx4 s[0:3], s[4:5], 0x0
v_mov_b32_e32 v0, 0
s_load_dwordx2 s[4:5], s[4:5], 0x10
v_mov_b32_e32 v1, 0x77777
v_mov_b32_e32 v2, 0x33333
s_waitcnt lgkmcnt(0)
global_store_dword v0, v1, s[0:1] offset:40
global_store_dword v0, v2, s[2:3] offset:80
v_mov_b32_e32 v1, 0x12121
global_store_dword v0, v1, s[4:5] offset:120
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5MMMMMPiS_S_
4,四个参数的kernel 的汇编语言
param_04.hip
__global__ void MMMMM(int* AA, int* BB, int* CC, int* DD)
{AA[10] = 0x77777;BB[20] = 0x33333;CC[30] = 0x22222;DD[40] = 0x44444;}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_04.hip -o param_04.s
_Z5MMMMMPiS_S_S_: ; @_Z5MMMMMPiS_S_S_
; %bb.0: ; %entry
s_load_dwordx8 s[0:7], s[4:5], 0x0
v_mov_b32_e32 v0, 0
v_mov_b32_e32 v1, 0x77777
v_mov_b32_e32 v2, 0x33333
s_waitcnt lgkmcnt(0)
global_store_dword v0, v1, s[0:1] offset:40
global_store_dword v0, v2, s[2:3] offset:80
v_mov_b32_e32 v1, 0x22222
global_store_dword v0, v1, s[4:5] offset:120
v_mov_b32_e32 v1, 0x44444
global_store_dword v0, v1, s[6:7] offset:160
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5MMMMMPiS_S_S_
5, 带线程索引的 kernel 的汇编语言
param_threadid.hip
#include <hip/hip_runtime.h>__global__ void MMMMM(int* AA)
{AA[threadIdx.x + 10] = 0x77777;}
../../../local_amdgpu/bin/clang++ -O1 --cuda-device-only --offload-arch=gfx906 -S ./param_threadid.hip -o param_threadid.s
_Z5MMMMMPi: ; @_Z5MMMMMPi
; %bb.0: ; %entry
s_load_dwordx2 s[0:1], s[4:5], 0x0
v_lshlrev_b32_e32 v0, 2, v0
v_mov_b32_e32 v1, 0x77777
s_waitcnt lgkmcnt(0)
global_store_dword v0, v1, s[0:1] offset:40
s_endpgm
.section .rodata,"a",@progbits
.p2align 6, 0x0
.amdhsa_kernel _Z5MMMMMPi