1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s 4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s 5; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 6 7; FIXME: Merge into imm.ll 8 9define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { 10; GFX10-LABEL: store_inline_imm_neg_0.0_i16: 11; GFX10: ; %bb.0: 12; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 13; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 14; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 15; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 16; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 17; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 18; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 19; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 20; 21; GFX11-LABEL: store_inline_imm_neg_0.0_i16: 22; GFX11: ; %bb.0: 23; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 24; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 25; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 26; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 27; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 28; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80] 29; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 30; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 31; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 32; 33; VI-LABEL: store_inline_imm_neg_0.0_i16: 34; VI: ; %bb.0: 35; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 36; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 37; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 38; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 39; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 40; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 41; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 42; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 43; 44; SI-LABEL: store_inline_imm_neg_0.0_i16: 45; SI: ; %bb.0: 46; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 47; SI-NEXT: s_mov_b32 s3, 0xf000 48; SI-NEXT: s_mov_b32 s2, -1 49; SI-NEXT: v_mov_b32_e32 v0, 0x8000 50; SI-NEXT: s_waitcnt lgkmcnt(0) 51; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 52; SI-NEXT: s_waitcnt vmcnt(0) 53; SI-NEXT: s_endpgm 54 store volatile i16 -32768, i16 addrspace(1)* %out 55 ret void 56} 57 58define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { 59; GFX10-LABEL: store_inline_imm_0.0_f16: 60; GFX10: ; %bb.0: 61; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 62; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 63; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 64; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 65; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 66; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 67; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 68; 69; GFX11-LABEL: store_inline_imm_0.0_f16: 70; GFX11: ; %bb.0: 71; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 72; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 73; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 74; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 75; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 76; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 77; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 78; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 79; 80; VI-LABEL: store_inline_imm_0.0_f16: 81; VI: ; %bb.0: 82; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 83; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 84; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 85; VI-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 86; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 87; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 88; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 89; 90; SI-LABEL: store_inline_imm_0.0_f16: 91; SI: ; %bb.0: 92; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 93; SI-NEXT: s_mov_b32 s3, 0xf000 94; SI-NEXT: s_mov_b32 s2, -1 95; SI-NEXT: v_mov_b32_e32 v0, 0 96; SI-NEXT: s_waitcnt lgkmcnt(0) 97; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 98; SI-NEXT: s_endpgm 99 store half 0.0, half addrspace(1)* %out 100 ret void 101} 102 103define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { 104; GFX10-LABEL: store_imm_neg_0.0_f16: 105; GFX10: ; %bb.0: 106; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 107; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 108; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 109; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 110; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 111; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 112; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 113; 114; GFX11-LABEL: store_imm_neg_0.0_f16: 115; GFX11: ; %bb.0: 116; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 117; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 118; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 119; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 120; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 121; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 122; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 123; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 124; 125; VI-LABEL: store_imm_neg_0.0_f16: 126; VI: ; %bb.0: 127; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 128; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 129; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 130; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 131; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 132; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 133; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 134; 135; SI-LABEL: store_imm_neg_0.0_f16: 136; SI: ; %bb.0: 137; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 138; SI-NEXT: s_mov_b32 s3, 0xf000 139; SI-NEXT: s_mov_b32 s2, -1 140; SI-NEXT: v_mov_b32_e32 v0, 0x8000 141; SI-NEXT: s_waitcnt lgkmcnt(0) 142; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 143; SI-NEXT: s_endpgm 144 store half -0.0, half addrspace(1)* %out 145 ret void 146} 147 148define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { 149; GFX10-LABEL: store_inline_imm_0.5_f16: 150; GFX10: ; %bb.0: 151; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 152; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 153; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 154; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 155; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 156; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 157; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 158; 159; GFX11-LABEL: store_inline_imm_0.5_f16: 160; GFX11: ; %bb.0: 161; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 162; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 163; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 164; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 165; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 166; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 167; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 168; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 169; 170; VI-LABEL: store_inline_imm_0.5_f16: 171; VI: ; %bb.0: 172; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 173; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 174; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 175; VI-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 176; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 177; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 178; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 179; 180; SI-LABEL: store_inline_imm_0.5_f16: 181; SI: ; %bb.0: 182; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 183; SI-NEXT: s_mov_b32 s3, 0xf000 184; SI-NEXT: s_mov_b32 s2, -1 185; SI-NEXT: v_mov_b32_e32 v0, 0x3800 186; SI-NEXT: s_waitcnt lgkmcnt(0) 187; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 188; SI-NEXT: s_endpgm 189 store half 0.5, half addrspace(1)* %out 190 ret void 191} 192 193define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { 194; GFX10-LABEL: store_inline_imm_m_0.5_f16: 195; GFX10: ; %bb.0: 196; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 197; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 198; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 199; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 200; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 201; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 202; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 203; 204; GFX11-LABEL: store_inline_imm_m_0.5_f16: 205; GFX11: ; %bb.0: 206; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 207; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 208; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 209; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 210; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 211; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 212; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 213; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 214; 215; VI-LABEL: store_inline_imm_m_0.5_f16: 216; VI: ; %bb.0: 217; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 218; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 219; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 220; VI-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 221; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 222; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 223; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 224; 225; SI-LABEL: store_inline_imm_m_0.5_f16: 226; SI: ; %bb.0: 227; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 228; SI-NEXT: s_mov_b32 s3, 0xf000 229; SI-NEXT: s_mov_b32 s2, -1 230; SI-NEXT: v_mov_b32_e32 v0, 0xb800 231; SI-NEXT: s_waitcnt lgkmcnt(0) 232; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 233; SI-NEXT: s_endpgm 234 store half -0.5, half addrspace(1)* %out 235 ret void 236} 237 238define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { 239; GFX10-LABEL: store_inline_imm_1.0_f16: 240; GFX10: ; %bb.0: 241; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 242; GFX10-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 243; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 244; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 245; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 246; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 247; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 248; 249; GFX11-LABEL: store_inline_imm_1.0_f16: 250; GFX11: ; %bb.0: 251; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 252; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 253; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 254; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 255; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 256; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 257; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 258; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 259; 260; VI-LABEL: store_inline_imm_1.0_f16: 261; VI: ; %bb.0: 262; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 263; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 264; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 265; VI-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 266; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 267; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 268; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 269; 270; SI-LABEL: store_inline_imm_1.0_f16: 271; SI: ; %bb.0: 272; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 273; SI-NEXT: s_mov_b32 s3, 0xf000 274; SI-NEXT: s_mov_b32 s2, -1 275; SI-NEXT: v_mov_b32_e32 v0, 0x3c00 276; SI-NEXT: s_waitcnt lgkmcnt(0) 277; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 278; SI-NEXT: s_endpgm 279 store half 1.0, half addrspace(1)* %out 280 ret void 281} 282 283define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { 284; GFX10-LABEL: store_inline_imm_m_1.0_f16: 285; GFX10: ; %bb.0: 286; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 287; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 288; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 289; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 290; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 291; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 292; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 293; 294; GFX11-LABEL: store_inline_imm_m_1.0_f16: 295; GFX11: ; %bb.0: 296; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 297; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 298; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 299; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 300; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 301; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 302; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 303; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 304; 305; VI-LABEL: store_inline_imm_m_1.0_f16: 306; VI: ; %bb.0: 307; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 308; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 309; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 310; VI-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 311; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 312; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 313; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 314; 315; SI-LABEL: store_inline_imm_m_1.0_f16: 316; SI: ; %bb.0: 317; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 318; SI-NEXT: s_mov_b32 s3, 0xf000 319; SI-NEXT: s_mov_b32 s2, -1 320; SI-NEXT: v_mov_b32_e32 v0, 0xbc00 321; SI-NEXT: s_waitcnt lgkmcnt(0) 322; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 323; SI-NEXT: s_endpgm 324 store half -1.0, half addrspace(1)* %out 325 ret void 326} 327 328define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { 329; GFX10-LABEL: store_inline_imm_2.0_f16: 330; GFX10: ; %bb.0: 331; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 332; GFX10-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 333; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 334; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 335; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 336; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 337; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 338; 339; GFX11-LABEL: store_inline_imm_2.0_f16: 340; GFX11: ; %bb.0: 341; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 342; GFX11-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 343; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 344; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 345; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 346; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 347; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 348; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 349; 350; VI-LABEL: store_inline_imm_2.0_f16: 351; VI: ; %bb.0: 352; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 353; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 354; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 355; VI-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 356; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 357; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 358; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 359; 360; SI-LABEL: store_inline_imm_2.0_f16: 361; SI: ; %bb.0: 362; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 363; SI-NEXT: s_mov_b32 s3, 0xf000 364; SI-NEXT: s_mov_b32 s2, -1 365; SI-NEXT: v_mov_b32_e32 v0, 0x4000 366; SI-NEXT: s_waitcnt lgkmcnt(0) 367; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 368; SI-NEXT: s_endpgm 369 store half 2.0, half addrspace(1)* %out 370 ret void 371} 372 373define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { 374; GFX10-LABEL: store_inline_imm_m_2.0_f16: 375; GFX10: ; %bb.0: 376; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 377; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 378; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 379; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 380; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 381; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 382; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 383; 384; GFX11-LABEL: store_inline_imm_m_2.0_f16: 385; GFX11: ; %bb.0: 386; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 387; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 388; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 389; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 390; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 391; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 392; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 393; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 394; 395; VI-LABEL: store_inline_imm_m_2.0_f16: 396; VI: ; %bb.0: 397; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 398; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 399; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 400; VI-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 401; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 402; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 403; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 404; 405; SI-LABEL: store_inline_imm_m_2.0_f16: 406; SI: ; %bb.0: 407; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 408; SI-NEXT: s_mov_b32 s3, 0xf000 409; SI-NEXT: s_mov_b32 s2, -1 410; SI-NEXT: v_mov_b32_e32 v0, 0xc000 411; SI-NEXT: s_waitcnt lgkmcnt(0) 412; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 413; SI-NEXT: s_endpgm 414 store half -2.0, half addrspace(1)* %out 415 ret void 416} 417 418define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { 419; GFX10-LABEL: store_inline_imm_4.0_f16: 420; GFX10: ; %bb.0: 421; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 422; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 423; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 424; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 425; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 426; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 427; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 428; 429; GFX11-LABEL: store_inline_imm_4.0_f16: 430; GFX11: ; %bb.0: 431; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 432; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 433; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 434; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 435; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 436; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 437; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 438; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 439; 440; VI-LABEL: store_inline_imm_4.0_f16: 441; VI: ; %bb.0: 442; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 443; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 444; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 445; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 446; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 447; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 448; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 449; 450; SI-LABEL: store_inline_imm_4.0_f16: 451; SI: ; %bb.0: 452; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 453; SI-NEXT: s_mov_b32 s3, 0xf000 454; SI-NEXT: s_mov_b32 s2, -1 455; SI-NEXT: v_mov_b32_e32 v0, 0x4400 456; SI-NEXT: s_waitcnt lgkmcnt(0) 457; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 458; SI-NEXT: s_endpgm 459 store half 4.0, half addrspace(1)* %out 460 ret void 461} 462 463define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { 464; GFX10-LABEL: store_inline_imm_m_4.0_f16: 465; GFX10: ; %bb.0: 466; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 467; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 468; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 469; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 470; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 471; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 472; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 473; 474; GFX11-LABEL: store_inline_imm_m_4.0_f16: 475; GFX11: ; %bb.0: 476; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 477; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 478; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 479; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 480; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 481; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 482; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 483; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 484; 485; VI-LABEL: store_inline_imm_m_4.0_f16: 486; VI: ; %bb.0: 487; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 488; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 489; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 490; VI-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 491; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 492; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 493; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 494; 495; SI-LABEL: store_inline_imm_m_4.0_f16: 496; SI: ; %bb.0: 497; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 498; SI-NEXT: s_mov_b32 s3, 0xf000 499; SI-NEXT: s_mov_b32 s2, -1 500; SI-NEXT: v_mov_b32_e32 v0, 0xc400 501; SI-NEXT: s_waitcnt lgkmcnt(0) 502; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 503; SI-NEXT: s_endpgm 504 store half -4.0, half addrspace(1)* %out 505 ret void 506} 507 508define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { 509; GFX10-LABEL: store_inline_imm_inv_2pi_f16: 510; GFX10: ; %bb.0: 511; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 512; GFX10-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 513; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 514; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 515; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 516; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 517; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 518; 519; GFX11-LABEL: store_inline_imm_inv_2pi_f16: 520; GFX11: ; %bb.0: 521; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 522; GFX11-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 523; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 524; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 525; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 526; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 527; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 528; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 529; 530; VI-LABEL: store_inline_imm_inv_2pi_f16: 531; VI: ; %bb.0: 532; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 533; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 534; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 535; VI-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 536; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 537; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 538; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 539; 540; SI-LABEL: store_inline_imm_inv_2pi_f16: 541; SI: ; %bb.0: 542; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 543; SI-NEXT: s_mov_b32 s3, 0xf000 544; SI-NEXT: s_mov_b32 s2, -1 545; SI-NEXT: v_mov_b32_e32 v0, 0x3118 546; SI-NEXT: s_waitcnt lgkmcnt(0) 547; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 548; SI-NEXT: s_endpgm 549 store half 0xH3118, half addrspace(1)* %out 550 ret void 551} 552 553define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { 554; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16: 555; GFX10: ; %bb.0: 556; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 557; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 558; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 559; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 560; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 561; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 562; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 563; 564; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16: 565; GFX11: ; %bb.0: 566; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 567; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 568; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 569; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 570; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 571; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 572; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 573; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 574; 575; VI-LABEL: store_inline_imm_m_inv_2pi_f16: 576; VI: ; %bb.0: 577; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 578; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 579; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 580; VI-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 581; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 582; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 583; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 584; 585; SI-LABEL: store_inline_imm_m_inv_2pi_f16: 586; SI: ; %bb.0: 587; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 588; SI-NEXT: s_mov_b32 s3, 0xf000 589; SI-NEXT: s_mov_b32 s2, -1 590; SI-NEXT: v_mov_b32_e32 v0, 0xb118 591; SI-NEXT: s_waitcnt lgkmcnt(0) 592; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 593; SI-NEXT: s_endpgm 594 store half 0xHB118, half addrspace(1)* %out 595 ret void 596} 597 598define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) { 599; GFX10-LABEL: store_literal_imm_f16: 600; GFX10: ; %bb.0: 601; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 602; GFX10-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 603; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 604; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 605; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 606; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 607; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 608; 609; GFX11-LABEL: store_literal_imm_f16: 610; GFX11: ; %bb.0: 611; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 612; GFX11-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 613; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 614; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 615; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 616; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 617; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 618; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 619; 620; VI-LABEL: store_literal_imm_f16: 621; VI: ; %bb.0: 622; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 623; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 624; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 625; VI-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 626; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 627; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 628; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 629; 630; SI-LABEL: store_literal_imm_f16: 631; SI: ; %bb.0: 632; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 633; SI-NEXT: s_mov_b32 s3, 0xf000 634; SI-NEXT: s_mov_b32 s2, -1 635; SI-NEXT: v_mov_b32_e32 v0, 0x6c00 636; SI-NEXT: s_waitcnt lgkmcnt(0) 637; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 638; SI-NEXT: s_endpgm 639 store half 4096.0, half addrspace(1)* %out 640 ret void 641} 642 643define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { 644; GFX10-LABEL: add_inline_imm_0.0_f16: 645; GFX10: ; %bb.0: 646; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 647; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 648; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 649; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 650; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 651; GFX10-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00] 652; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 653; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 654; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 655; 656; GFX11-LABEL: add_inline_imm_0.0_f16: 657; GFX11: ; %bb.0: 658; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 659; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 660; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 661; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 662; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 663; GFX11-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00] 664; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 665; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 666; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 667; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 668; 669; VI-LABEL: add_inline_imm_0.0_f16: 670; VI: ; %bb.0: 671; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 672; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 673; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 674; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 675; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 676; VI-NEXT: v_add_f16_e64 v0, s6, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x00,0x01,0x00] 677; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 678; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 679; 680; SI-LABEL: add_inline_imm_0.0_f16: 681; SI: ; %bb.0: 682; SI-NEXT: s_load_dword s2, s[0:1], 0xb 683; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 684; SI-NEXT: s_mov_b32 s3, 0xf000 685; SI-NEXT: s_waitcnt lgkmcnt(0) 686; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 687; SI-NEXT: s_mov_b32 s2, -1 688; SI-NEXT: v_add_f32_e32 v0, 0, v0 689; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 690; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 691; SI-NEXT: s_endpgm 692 %y = fadd half %x, 0.0 693 store half %y, half addrspace(1)* %out 694 ret void 695} 696 697define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { 698; GFX10-LABEL: add_inline_imm_0.5_f16: 699; GFX10: ; %bb.0: 700; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 701; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 702; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 703; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 704; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 705; GFX10-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00] 706; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 707; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 708; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 709; 710; GFX11-LABEL: add_inline_imm_0.5_f16: 711; GFX11: ; %bb.0: 712; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 713; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 714; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 715; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 716; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 717; GFX11-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00] 718; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 719; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 720; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 721; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 722; 723; VI-LABEL: add_inline_imm_0.5_f16: 724; VI: ; %bb.0: 725; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 726; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 727; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 728; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 729; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 730; VI-NEXT: v_add_f16_e64 v0, s6, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe0,0x01,0x00] 731; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 732; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 733; 734; SI-LABEL: add_inline_imm_0.5_f16: 735; SI: ; %bb.0: 736; SI-NEXT: s_load_dword s2, s[0:1], 0xb 737; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 738; SI-NEXT: s_mov_b32 s3, 0xf000 739; SI-NEXT: s_waitcnt lgkmcnt(0) 740; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 741; SI-NEXT: s_mov_b32 s2, -1 742; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 743; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 744; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 745; SI-NEXT: s_endpgm 746 %y = fadd half %x, 0.5 747 store half %y, half addrspace(1)* %out 748 ret void 749} 750 751define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { 752; GFX10-LABEL: add_inline_imm_neg_0.5_f16: 753; GFX10: ; %bb.0: 754; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 755; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 756; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 757; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 758; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 759; GFX10-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00] 760; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 761; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 762; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 763; 764; GFX11-LABEL: add_inline_imm_neg_0.5_f16: 765; GFX11: ; %bb.0: 766; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 767; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 768; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 769; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 770; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 771; GFX11-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00] 772; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 773; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 774; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 775; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 776; 777; VI-LABEL: add_inline_imm_neg_0.5_f16: 778; VI: ; %bb.0: 779; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 780; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 781; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 782; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 783; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 784; VI-NEXT: v_add_f16_e64 v0, s6, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe2,0x01,0x00] 785; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 786; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 787; 788; SI-LABEL: add_inline_imm_neg_0.5_f16: 789; SI: ; %bb.0: 790; SI-NEXT: s_load_dword s2, s[0:1], 0xb 791; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 792; SI-NEXT: s_mov_b32 s3, 0xf000 793; SI-NEXT: s_waitcnt lgkmcnt(0) 794; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 795; SI-NEXT: s_mov_b32 s2, -1 796; SI-NEXT: v_add_f32_e32 v0, -0.5, v0 797; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 798; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 799; SI-NEXT: s_endpgm 800 %y = fadd half %x, -0.5 801 store half %y, half addrspace(1)* %out 802 ret void 803} 804 805define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { 806; GFX10-LABEL: add_inline_imm_1.0_f16: 807; GFX10: ; %bb.0: 808; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 809; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 810; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 811; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 812; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 813; GFX10-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00] 814; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 815; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 816; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 817; 818; GFX11-LABEL: add_inline_imm_1.0_f16: 819; GFX11: ; %bb.0: 820; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 821; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 822; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 823; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 824; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 825; GFX11-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00] 826; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 827; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 828; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 829; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 830; 831; VI-LABEL: add_inline_imm_1.0_f16: 832; VI: ; %bb.0: 833; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 834; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 835; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 836; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 837; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 838; VI-NEXT: v_add_f16_e64 v0, s6, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe4,0x01,0x00] 839; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 840; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 841; 842; SI-LABEL: add_inline_imm_1.0_f16: 843; SI: ; %bb.0: 844; SI-NEXT: s_load_dword s2, s[0:1], 0xb 845; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 846; SI-NEXT: s_mov_b32 s3, 0xf000 847; SI-NEXT: s_waitcnt lgkmcnt(0) 848; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 849; SI-NEXT: s_mov_b32 s2, -1 850; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 851; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 852; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 853; SI-NEXT: s_endpgm 854 %y = fadd half %x, 1.0 855 store half %y, half addrspace(1)* %out 856 ret void 857} 858 859define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { 860; GFX10-LABEL: add_inline_imm_neg_1.0_f16: 861; GFX10: ; %bb.0: 862; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 863; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 864; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 865; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 866; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 867; GFX10-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00] 868; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 869; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 870; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 871; 872; GFX11-LABEL: add_inline_imm_neg_1.0_f16: 873; GFX11: ; %bb.0: 874; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 875; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 876; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 877; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 878; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 879; GFX11-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00] 880; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 881; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 882; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 883; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 884; 885; VI-LABEL: add_inline_imm_neg_1.0_f16: 886; VI: ; %bb.0: 887; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 888; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 889; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 890; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 891; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 892; VI-NEXT: v_add_f16_e64 v0, s6, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe6,0x01,0x00] 893; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 894; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 895; 896; SI-LABEL: add_inline_imm_neg_1.0_f16: 897; SI: ; %bb.0: 898; SI-NEXT: s_load_dword s2, s[0:1], 0xb 899; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 900; SI-NEXT: s_mov_b32 s3, 0xf000 901; SI-NEXT: s_waitcnt lgkmcnt(0) 902; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 903; SI-NEXT: s_mov_b32 s2, -1 904; SI-NEXT: v_add_f32_e32 v0, -1.0, v0 905; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 906; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 907; SI-NEXT: s_endpgm 908 %y = fadd half %x, -1.0 909 store half %y, half addrspace(1)* %out 910 ret void 911} 912 913define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { 914; GFX10-LABEL: add_inline_imm_2.0_f16: 915; GFX10: ; %bb.0: 916; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 917; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 918; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 919; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 920; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 921; GFX10-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00] 922; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 923; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 924; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 925; 926; GFX11-LABEL: add_inline_imm_2.0_f16: 927; GFX11: ; %bb.0: 928; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 929; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 930; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 931; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 932; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 933; GFX11-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00] 934; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 935; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 936; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 937; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 938; 939; VI-LABEL: add_inline_imm_2.0_f16: 940; VI: ; %bb.0: 941; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 942; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 943; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 944; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 945; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 946; VI-NEXT: v_add_f16_e64 v0, s6, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe8,0x01,0x00] 947; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 948; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 949; 950; SI-LABEL: add_inline_imm_2.0_f16: 951; SI: ; %bb.0: 952; SI-NEXT: s_load_dword s2, s[0:1], 0xb 953; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 954; SI-NEXT: s_mov_b32 s3, 0xf000 955; SI-NEXT: s_waitcnt lgkmcnt(0) 956; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 957; SI-NEXT: s_mov_b32 s2, -1 958; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 959; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 960; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 961; SI-NEXT: s_endpgm 962 %y = fadd half %x, 2.0 963 store half %y, half addrspace(1)* %out 964 ret void 965} 966 967define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { 968; GFX10-LABEL: add_inline_imm_neg_2.0_f16: 969; GFX10: ; %bb.0: 970; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 971; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 972; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 973; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 974; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 975; GFX10-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00] 976; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 977; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 978; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 979; 980; GFX11-LABEL: add_inline_imm_neg_2.0_f16: 981; GFX11: ; %bb.0: 982; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 983; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 984; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 985; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 986; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 987; GFX11-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00] 988; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 989; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 990; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 991; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 992; 993; VI-LABEL: add_inline_imm_neg_2.0_f16: 994; VI: ; %bb.0: 995; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 996; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 997; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 998; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 999; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1000; VI-NEXT: v_add_f16_e64 v0, s6, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xea,0x01,0x00] 1001; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1002; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1003; 1004; SI-LABEL: add_inline_imm_neg_2.0_f16: 1005; SI: ; %bb.0: 1006; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1007; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1008; SI-NEXT: s_mov_b32 s3, 0xf000 1009; SI-NEXT: s_waitcnt lgkmcnt(0) 1010; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1011; SI-NEXT: s_mov_b32 s2, -1 1012; SI-NEXT: v_add_f32_e32 v0, -2.0, v0 1013; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1014; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1015; SI-NEXT: s_endpgm 1016 %y = fadd half %x, -2.0 1017 store half %y, half addrspace(1)* %out 1018 ret void 1019} 1020 1021define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { 1022; GFX10-LABEL: add_inline_imm_4.0_f16: 1023; GFX10: ; %bb.0: 1024; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1025; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1026; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1027; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1028; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1029; GFX10-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00] 1030; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1031; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1032; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1033; 1034; GFX11-LABEL: add_inline_imm_4.0_f16: 1035; GFX11: ; %bb.0: 1036; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1037; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1038; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1039; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1040; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1041; GFX11-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00] 1042; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1043; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1044; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1045; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1046; 1047; VI-LABEL: add_inline_imm_4.0_f16: 1048; VI: ; %bb.0: 1049; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1050; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1051; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1052; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1053; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1054; VI-NEXT: v_add_f16_e64 v0, s6, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xec,0x01,0x00] 1055; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1056; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1057; 1058; SI-LABEL: add_inline_imm_4.0_f16: 1059; SI: ; %bb.0: 1060; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1061; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1062; SI-NEXT: s_mov_b32 s3, 0xf000 1063; SI-NEXT: s_waitcnt lgkmcnt(0) 1064; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1065; SI-NEXT: s_mov_b32 s2, -1 1066; SI-NEXT: v_add_f32_e32 v0, 4.0, v0 1067; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1068; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1069; SI-NEXT: s_endpgm 1070 %y = fadd half %x, 4.0 1071 store half %y, half addrspace(1)* %out 1072 ret void 1073} 1074 1075define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { 1076; GFX10-LABEL: add_inline_imm_neg_4.0_f16: 1077; GFX10: ; %bb.0: 1078; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1079; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1080; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1081; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1082; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1083; GFX10-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00] 1084; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1085; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1086; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1087; 1088; GFX11-LABEL: add_inline_imm_neg_4.0_f16: 1089; GFX11: ; %bb.0: 1090; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1091; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1092; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1093; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1094; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1095; GFX11-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00] 1096; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1097; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1098; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1099; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1100; 1101; VI-LABEL: add_inline_imm_neg_4.0_f16: 1102; VI: ; %bb.0: 1103; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1104; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1105; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1106; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1107; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1108; VI-NEXT: v_add_f16_e64 v0, s6, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xee,0x01,0x00] 1109; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1110; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1111; 1112; SI-LABEL: add_inline_imm_neg_4.0_f16: 1113; SI: ; %bb.0: 1114; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1115; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1116; SI-NEXT: s_mov_b32 s3, 0xf000 1117; SI-NEXT: s_waitcnt lgkmcnt(0) 1118; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1119; SI-NEXT: s_mov_b32 s2, -1 1120; SI-NEXT: v_add_f32_e32 v0, -4.0, v0 1121; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1122; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1123; SI-NEXT: s_endpgm 1124 %y = fadd half %x, -4.0 1125 store half %y, half addrspace(1)* %out 1126 ret void 1127} 1128 1129define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 1130; GFX10-LABEL: commute_add_inline_imm_0.5_f16: 1131; GFX10: ; %bb.0: 1132; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1133; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1134; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1135; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1136; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1137; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1138; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1139; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1140; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1141; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1142; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1143; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1144; GFX10-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64] 1145; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1146; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1147; 1148; GFX11-LABEL: commute_add_inline_imm_0.5_f16: 1149; GFX11: ; %bb.0: 1150; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1151; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1152; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1153; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1154; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1155; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1156; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1157; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1158; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1159; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1160; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1161; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1162; GFX11-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64] 1163; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1164; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1165; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1166; 1167; VI-LABEL: commute_add_inline_imm_0.5_f16: 1168; VI: ; %bb.0: 1169; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1170; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1171; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1172; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1173; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1174; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1175; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1176; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1177; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1178; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1179; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1180; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1181; VI-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e] 1182; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1183; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1184; 1185; SI-LABEL: commute_add_inline_imm_0.5_f16: 1186; SI: ; %bb.0: 1187; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1188; SI-NEXT: s_mov_b32 s7, 0xf000 1189; SI-NEXT: s_mov_b32 s6, -1 1190; SI-NEXT: s_mov_b32 s10, s6 1191; SI-NEXT: s_mov_b32 s11, s7 1192; SI-NEXT: s_waitcnt lgkmcnt(0) 1193; SI-NEXT: s_mov_b32 s8, s2 1194; SI-NEXT: s_mov_b32 s9, s3 1195; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1196; SI-NEXT: s_mov_b32 s4, s0 1197; SI-NEXT: s_mov_b32 s5, s1 1198; SI-NEXT: s_waitcnt vmcnt(0) 1199; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1200; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 1201; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1202; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1203; SI-NEXT: s_endpgm 1204 %x = load half, half addrspace(1)* %in 1205 %y = fadd half %x, 0.5 1206 store half %y, half addrspace(1)* %out 1207 ret void 1208} 1209 1210define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 1211; GFX10-LABEL: commute_add_literal_f16: 1212; GFX10: ; %bb.0: 1213; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1214; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1215; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1216; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1217; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1218; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1219; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1220; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1221; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1222; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1223; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1224; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1225; GFX10-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00] 1226; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1227; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1228; 1229; GFX11-LABEL: commute_add_literal_f16: 1230; GFX11: ; %bb.0: 1231; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1232; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1233; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1234; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1235; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1236; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1237; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1238; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1239; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1240; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1241; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1242; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1243; GFX11-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00] 1244; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1245; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1246; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1247; 1248; VI-LABEL: commute_add_literal_f16: 1249; VI: ; %bb.0: 1250; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1251; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1252; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1253; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1254; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1255; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1256; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1257; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1258; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1259; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1260; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1261; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1262; VI-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00] 1263; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1264; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1265; 1266; SI-LABEL: commute_add_literal_f16: 1267; SI: ; %bb.0: 1268; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1269; SI-NEXT: s_mov_b32 s7, 0xf000 1270; SI-NEXT: s_mov_b32 s6, -1 1271; SI-NEXT: s_mov_b32 s10, s6 1272; SI-NEXT: s_mov_b32 s11, s7 1273; SI-NEXT: s_waitcnt lgkmcnt(0) 1274; SI-NEXT: s_mov_b32 s8, s2 1275; SI-NEXT: s_mov_b32 s9, s3 1276; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1277; SI-NEXT: s_mov_b32 s4, s0 1278; SI-NEXT: s_mov_b32 s5, s1 1279; SI-NEXT: s_waitcnt vmcnt(0) 1280; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1281; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 1282; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1283; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1284; SI-NEXT: s_endpgm 1285 %x = load half, half addrspace(1)* %in 1286 %y = fadd half %x, 1024.0 1287 store half %y, half addrspace(1)* %out 1288 ret void 1289} 1290 1291define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { 1292; GFX10-LABEL: add_inline_imm_1_f16: 1293; GFX10: ; %bb.0: 1294; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1295; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1296; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1297; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1298; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1299; GFX10-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00] 1300; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1301; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1302; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1303; 1304; GFX11-LABEL: add_inline_imm_1_f16: 1305; GFX11: ; %bb.0: 1306; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1307; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1308; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1309; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1310; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1311; GFX11-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00] 1312; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1313; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1314; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1315; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1316; 1317; VI-LABEL: add_inline_imm_1_f16: 1318; VI: ; %bb.0: 1319; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1320; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1321; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1322; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1323; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1324; VI-NEXT: v_add_f16_e64 v0, s6, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x02,0x01,0x00] 1325; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1326; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1327; 1328; SI-LABEL: add_inline_imm_1_f16: 1329; SI: ; %bb.0: 1330; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1331; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1332; SI-NEXT: s_mov_b32 s3, 0xf000 1333; SI-NEXT: s_waitcnt lgkmcnt(0) 1334; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1335; SI-NEXT: s_mov_b32 s2, -1 1336; SI-NEXT: v_add_f32_e32 v0, 0x33800000, v0 1337; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1338; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1339; SI-NEXT: s_endpgm 1340 %y = fadd half %x, 0xH0001 1341 store half %y, half addrspace(1)* %out 1342 ret void 1343} 1344 1345define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { 1346; GFX10-LABEL: add_inline_imm_2_f16: 1347; GFX10: ; %bb.0: 1348; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1349; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1350; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1351; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1352; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1353; GFX10-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00] 1354; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1355; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1356; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1357; 1358; GFX11-LABEL: add_inline_imm_2_f16: 1359; GFX11: ; %bb.0: 1360; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1361; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1362; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1363; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1364; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1365; GFX11-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00] 1366; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1367; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1368; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1369; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1370; 1371; VI-LABEL: add_inline_imm_2_f16: 1372; VI: ; %bb.0: 1373; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1374; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1375; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1376; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1377; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1378; VI-NEXT: v_add_f16_e64 v0, s6, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x04,0x01,0x00] 1379; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1380; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1381; 1382; SI-LABEL: add_inline_imm_2_f16: 1383; SI: ; %bb.0: 1384; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1385; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1386; SI-NEXT: s_mov_b32 s3, 0xf000 1387; SI-NEXT: s_waitcnt lgkmcnt(0) 1388; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1389; SI-NEXT: s_mov_b32 s2, -1 1390; SI-NEXT: v_add_f32_e32 v0, 0x34000000, v0 1391; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1392; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1393; SI-NEXT: s_endpgm 1394 %y = fadd half %x, 0xH0002 1395 store half %y, half addrspace(1)* %out 1396 ret void 1397} 1398 1399define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { 1400; GFX10-LABEL: add_inline_imm_16_f16: 1401; GFX10: ; %bb.0: 1402; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1403; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1404; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1405; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1406; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1407; GFX10-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00] 1408; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1409; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1410; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1411; 1412; GFX11-LABEL: add_inline_imm_16_f16: 1413; GFX11: ; %bb.0: 1414; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1415; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1416; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1417; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1418; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1419; GFX11-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00] 1420; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1421; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1422; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1423; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1424; 1425; VI-LABEL: add_inline_imm_16_f16: 1426; VI: ; %bb.0: 1427; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1428; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1429; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1430; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1431; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1432; VI-NEXT: v_add_f16_e64 v0, s6, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x20,0x01,0x00] 1433; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1434; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1435; 1436; SI-LABEL: add_inline_imm_16_f16: 1437; SI: ; %bb.0: 1438; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1439; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1440; SI-NEXT: s_mov_b32 s3, 0xf000 1441; SI-NEXT: s_waitcnt lgkmcnt(0) 1442; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1443; SI-NEXT: s_mov_b32 s2, -1 1444; SI-NEXT: v_add_f32_e32 v0, 0x35800000, v0 1445; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1446; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1447; SI-NEXT: s_endpgm 1448 %y = fadd half %x, 0xH0010 1449 store half %y, half addrspace(1)* %out 1450 ret void 1451} 1452 1453define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1454; GFX10-LABEL: add_inline_imm_neg_1_f16: 1455; GFX10: ; %bb.0: 1456; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1457; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1458; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1459; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1460; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1461; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1462; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1463; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1464; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1465; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1466; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1467; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1468; GFX10-NEXT: v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00] 1469; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1470; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1471; 1472; GFX11-LABEL: add_inline_imm_neg_1_f16: 1473; GFX11: ; %bb.0: 1474; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1475; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1476; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1477; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1478; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1479; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1480; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1481; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1482; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1483; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1484; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1485; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1486; GFX11-NEXT: v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00] 1487; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1488; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1489; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1490; 1491; VI-LABEL: add_inline_imm_neg_1_f16: 1492; VI: ; %bb.0: 1493; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1494; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1495; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1496; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1497; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1498; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1499; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1500; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1501; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1502; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1503; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1504; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1505; VI-NEXT: v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c] 1506; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1507; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1508; 1509; SI-LABEL: add_inline_imm_neg_1_f16: 1510; SI: ; %bb.0: 1511; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1512; SI-NEXT: s_mov_b32 s7, 0xf000 1513; SI-NEXT: s_mov_b32 s6, -1 1514; SI-NEXT: s_mov_b32 s10, s6 1515; SI-NEXT: s_mov_b32 s11, s7 1516; SI-NEXT: s_waitcnt lgkmcnt(0) 1517; SI-NEXT: s_mov_b32 s8, s2 1518; SI-NEXT: s_mov_b32 s9, s3 1519; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1520; SI-NEXT: s_mov_b32 s4, s0 1521; SI-NEXT: s_mov_b32 s5, s1 1522; SI-NEXT: s_waitcnt vmcnt(0) 1523; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0 1524; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1525; SI-NEXT: s_endpgm 1526 %x = load i16, i16 addrspace(1)* %in 1527 %y = add i16 %x, -1 1528 %ybc = bitcast i16 %y to half 1529 store half %ybc, half addrspace(1)* %out 1530 ret void 1531} 1532 1533define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1534; GFX10-LABEL: add_inline_imm_neg_2_f16: 1535; GFX10: ; %bb.0: 1536; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1537; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1538; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1539; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1540; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1541; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1542; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1543; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1544; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1545; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1546; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1547; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1548; GFX10-NEXT: v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00] 1549; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1550; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1551; 1552; GFX11-LABEL: add_inline_imm_neg_2_f16: 1553; GFX11: ; %bb.0: 1554; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1555; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1556; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1557; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1558; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1559; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1560; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1561; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1562; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1563; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1564; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1565; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1566; GFX11-NEXT: v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00] 1567; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1568; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1569; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1570; 1571; VI-LABEL: add_inline_imm_neg_2_f16: 1572; VI: ; %bb.0: 1573; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1574; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1575; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1576; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1577; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1578; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1579; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1580; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1581; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1582; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1583; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1584; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1585; VI-NEXT: v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c] 1586; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1587; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1588; 1589; SI-LABEL: add_inline_imm_neg_2_f16: 1590; SI: ; %bb.0: 1591; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1592; SI-NEXT: s_mov_b32 s7, 0xf000 1593; SI-NEXT: s_mov_b32 s6, -1 1594; SI-NEXT: s_mov_b32 s10, s6 1595; SI-NEXT: s_mov_b32 s11, s7 1596; SI-NEXT: s_waitcnt lgkmcnt(0) 1597; SI-NEXT: s_mov_b32 s8, s2 1598; SI-NEXT: s_mov_b32 s9, s3 1599; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1600; SI-NEXT: s_mov_b32 s4, s0 1601; SI-NEXT: s_mov_b32 s5, s1 1602; SI-NEXT: s_waitcnt vmcnt(0) 1603; SI-NEXT: v_add_i32_e32 v0, vcc, -2, v0 1604; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1605; SI-NEXT: s_endpgm 1606 %x = load i16, i16 addrspace(1)* %in 1607 %y = add i16 %x, -2 1608 %ybc = bitcast i16 %y to half 1609 store half %ybc, half addrspace(1)* %out 1610 ret void 1611} 1612 1613define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1614; GFX10-LABEL: add_inline_imm_neg_16_f16: 1615; GFX10: ; %bb.0: 1616; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1617; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1618; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1619; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1620; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1621; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1622; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1623; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1624; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1625; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1626; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1627; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1628; GFX10-NEXT: v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00] 1629; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1630; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1631; 1632; GFX11-LABEL: add_inline_imm_neg_16_f16: 1633; GFX11: ; %bb.0: 1634; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 ; encoding: [0x00,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1635; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1636; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1637; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1638; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1639; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1640; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1641; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1642; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1643; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1644; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1645; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1646; GFX11-NEXT: v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00] 1647; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1648; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1649; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1650; 1651; VI-LABEL: add_inline_imm_neg_16_f16: 1652; VI: ; %bb.0: 1653; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1654; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1655; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1656; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1657; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1658; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1659; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1660; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1661; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1662; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1663; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1664; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1665; VI-NEXT: v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c] 1666; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1667; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1668; 1669; SI-LABEL: add_inline_imm_neg_16_f16: 1670; SI: ; %bb.0: 1671; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1672; SI-NEXT: s_mov_b32 s7, 0xf000 1673; SI-NEXT: s_mov_b32 s6, -1 1674; SI-NEXT: s_mov_b32 s10, s6 1675; SI-NEXT: s_mov_b32 s11, s7 1676; SI-NEXT: s_waitcnt lgkmcnt(0) 1677; SI-NEXT: s_mov_b32 s8, s2 1678; SI-NEXT: s_mov_b32 s9, s3 1679; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1680; SI-NEXT: s_mov_b32 s4, s0 1681; SI-NEXT: s_mov_b32 s5, s1 1682; SI-NEXT: s_waitcnt vmcnt(0) 1683; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0 1684; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1685; SI-NEXT: s_endpgm 1686 %x = load i16, i16 addrspace(1)* %in 1687 %y = add i16 %x, -16 1688 %ybc = bitcast i16 %y to half 1689 store half %ybc, half addrspace(1)* %out 1690 ret void 1691} 1692 1693define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { 1694; GFX10-LABEL: add_inline_imm_63_f16: 1695; GFX10: ; %bb.0: 1696; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1697; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1698; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1699; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1700; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1701; GFX10-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00] 1702; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1703; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1704; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1705; 1706; GFX11-LABEL: add_inline_imm_63_f16: 1707; GFX11: ; %bb.0: 1708; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1709; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1710; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1711; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1712; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1713; GFX11-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00] 1714; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1715; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1716; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1717; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1718; 1719; VI-LABEL: add_inline_imm_63_f16: 1720; VI: ; %bb.0: 1721; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1722; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1723; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1724; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1725; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1726; VI-NEXT: v_add_f16_e64 v0, s6, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x7e,0x01,0x00] 1727; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1728; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1729; 1730; SI-LABEL: add_inline_imm_63_f16: 1731; SI: ; %bb.0: 1732; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1733; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1734; SI-NEXT: s_mov_b32 s3, 0xf000 1735; SI-NEXT: s_waitcnt lgkmcnt(0) 1736; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1737; SI-NEXT: s_mov_b32 s2, -1 1738; SI-NEXT: v_add_f32_e32 v0, 0x367c0000, v0 1739; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1740; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1741; SI-NEXT: s_endpgm 1742 %y = fadd half %x, 0xH003F 1743 store half %y, half addrspace(1)* %out 1744 ret void 1745} 1746 1747define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { 1748; GFX10-LABEL: add_inline_imm_64_f16: 1749; GFX10: ; %bb.0: 1750; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1751; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1752; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1753; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1754; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1755; GFX10-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00] 1756; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1757; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1758; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1759; 1760; GFX11-LABEL: add_inline_imm_64_f16: 1761; GFX11: ; %bb.0: 1762; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1763; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8 ; encoding: [0x80,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1764; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; encoding: [0x00,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1765; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1766; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1767; GFX11-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00] 1768; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1769; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1770; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf] 1771; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1772; 1773; VI-LABEL: add_inline_imm_64_f16: 1774; VI: ; %bb.0: 1775; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1776; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1777; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1778; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1779; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1780; VI-NEXT: v_add_f16_e64 v0, s6, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x80,0x01,0x00] 1781; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1782; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1783; 1784; SI-LABEL: add_inline_imm_64_f16: 1785; SI: ; %bb.0: 1786; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1787; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1788; SI-NEXT: s_mov_b32 s3, 0xf000 1789; SI-NEXT: s_waitcnt lgkmcnt(0) 1790; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1791; SI-NEXT: s_mov_b32 s2, -1 1792; SI-NEXT: v_add_f32_e32 v0, 0x36800000, v0 1793; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1794; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1795; SI-NEXT: s_endpgm 1796 %y = fadd half %x, 0xH0040 1797 store half %y, half addrspace(1)* %out 1798 ret void 1799} 1800 1801; This needs to be emitted as a literal constant since the 16-bit 1802; float values do not work for 16-bit integer operations. 1803define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) { 1804; GFX10-LABEL: mul_inline_imm_0.5_i16: 1805; GFX10: ; %bb.0: 1806; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1807; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1808; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 1809; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1810; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1811; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1812; 1813; GFX11-LABEL: mul_inline_imm_0.5_i16: 1814; GFX11: ; %bb.0: 1815; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1816; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1817; GFX11-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 1818; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1819; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1820; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1821; 1822; VI-LABEL: mul_inline_imm_0.5_i16: 1823; VI: ; %bb.0: 1824; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1825; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00] 1826; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1827; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1828; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1829; 1830; SI-LABEL: mul_inline_imm_0.5_i16: 1831; SI: ; %bb.0: 1832; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1833; SI-NEXT: s_mov_b32 s6, 0 1834; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1835; SI-NEXT: s_mov_b32 s7, 0xf000 1836; SI-NEXT: s_mov_b32 s4, s6 1837; SI-NEXT: s_mov_b32 s5, s6 1838; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3800, v2 1839; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1840; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1841; SI-NEXT: s_setpc_b64 s[30:31] 1842 %y = mul i16 %x, bitcast (half 0.5 to i16) 1843 store i16 %y, i16 addrspace(1)* %out 1844 ret void 1845} 1846 1847define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) { 1848; GFX10-LABEL: mul_inline_imm_neg_0.5_i16: 1849; GFX10: ; %bb.0: 1850; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1851; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1852; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] 1853; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1854; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1855; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1856; 1857; GFX11-LABEL: mul_inline_imm_neg_0.5_i16: 1858; GFX11: ; %bb.0: 1859; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1860; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1861; GFX11-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] 1862; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1863; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1864; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1865; 1866; VI-LABEL: mul_inline_imm_neg_0.5_i16: 1867; VI: ; %bb.0: 1868; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1869; VI-NEXT: v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] 1870; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1871; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1872; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1873; 1874; SI-LABEL: mul_inline_imm_neg_0.5_i16: 1875; SI: ; %bb.0: 1876; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1877; SI-NEXT: s_mov_b32 s6, 0 1878; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1879; SI-NEXT: s_mov_b32 s7, 0xf000 1880; SI-NEXT: s_mov_b32 s4, s6 1881; SI-NEXT: s_mov_b32 s5, s6 1882; SI-NEXT: v_mul_u32_u24_e32 v2, 0xb800, v2 1883; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1884; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1885; SI-NEXT: s_setpc_b64 s[30:31] 1886 %y = mul i16 %x, bitcast (half -0.5 to i16) 1887 store i16 %y, i16 addrspace(1)* %out 1888 ret void 1889} 1890 1891define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) { 1892; GFX10-LABEL: mul_inline_imm_1.0_i16: 1893; GFX10: ; %bb.0: 1894; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1895; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1896; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] 1897; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1898; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1899; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1900; 1901; GFX11-LABEL: mul_inline_imm_1.0_i16: 1902; GFX11: ; %bb.0: 1903; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1904; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1905; GFX11-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] 1906; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1907; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1908; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1909; 1910; VI-LABEL: mul_inline_imm_1.0_i16: 1911; VI: ; %bb.0: 1912; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1913; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00] 1914; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1915; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1916; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1917; 1918; SI-LABEL: mul_inline_imm_1.0_i16: 1919; SI: ; %bb.0: 1920; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1921; SI-NEXT: s_mov_b32 s6, 0 1922; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1923; SI-NEXT: s_mov_b32 s7, 0xf000 1924; SI-NEXT: s_mov_b32 s4, s6 1925; SI-NEXT: s_mov_b32 s5, s6 1926; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3c00, v2 1927; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1928; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1929; SI-NEXT: s_setpc_b64 s[30:31] 1930 %y = mul i16 %x, bitcast (half 1.0 to i16) 1931 store i16 %y, i16 addrspace(1)* %out 1932 ret void 1933} 1934 1935define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) { 1936; GFX10-LABEL: mul_inline_imm_neg_1.0_i16: 1937; GFX10: ; %bb.0: 1938; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1939; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1940; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] 1941; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1942; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1943; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1944; 1945; GFX11-LABEL: mul_inline_imm_neg_1.0_i16: 1946; GFX11: ; %bb.0: 1947; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1948; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1949; GFX11-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] 1950; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1951; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1952; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1953; 1954; VI-LABEL: mul_inline_imm_neg_1.0_i16: 1955; VI: ; %bb.0: 1956; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1957; VI-NEXT: v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] 1958; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1959; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1960; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1961; 1962; SI-LABEL: mul_inline_imm_neg_1.0_i16: 1963; SI: ; %bb.0: 1964; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1965; SI-NEXT: s_mov_b32 s6, 0 1966; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1967; SI-NEXT: s_mov_b32 s7, 0xf000 1968; SI-NEXT: s_mov_b32 s4, s6 1969; SI-NEXT: s_mov_b32 s5, s6 1970; SI-NEXT: v_mul_u32_u24_e32 v2, 0xbc00, v2 1971; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1972; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1973; SI-NEXT: s_setpc_b64 s[30:31] 1974 %y = mul i16 %x, bitcast (half -1.0 to i16) 1975 store i16 %y, i16 addrspace(1)* %out 1976 ret void 1977} 1978 1979define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) { 1980; GFX10-LABEL: shl_inline_imm_2.0_i16: 1981; GFX10: ; %bb.0: 1982; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1983; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1984; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] 1985; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1986; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1987; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1988; 1989; GFX11-LABEL: shl_inline_imm_2.0_i16: 1990; GFX11: ; %bb.0: 1991; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1992; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1993; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] 1994; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1995; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 1996; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1997; 1998; VI-LABEL: shl_inline_imm_2.0_i16: 1999; VI: ; %bb.0: 2000; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2001; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0] 2002; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 2003; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2004; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2005; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2006; 2007; SI-LABEL: shl_inline_imm_2.0_i16: 2008; SI: ; %bb.0: 2009; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2010; SI-NEXT: s_mov_b32 s6, 0 2011; SI-NEXT: s_mov_b32 s7, 0xf000 2012; SI-NEXT: s_mov_b32 s4, s6 2013; SI-NEXT: s_mov_b32 s5, s6 2014; SI-NEXT: v_lshl_b32_e32 v2, 0x4000, v2 2015; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2016; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2017; SI-NEXT: s_setpc_b64 s[30:31] 2018 %y = shl i16 bitcast (half 2.0 to i16), %x 2019 store i16 %y, i16 addrspace(1)* %out 2020 ret void 2021} 2022 2023define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) { 2024; GFX10-LABEL: shl_inline_imm_neg_2.0_i16: 2025; GFX10: ; %bb.0: 2026; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2027; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2028; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] 2029; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2030; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2031; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2032; 2033; GFX11-LABEL: shl_inline_imm_neg_2.0_i16: 2034; GFX11: ; %bb.0: 2035; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2036; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2037; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] 2038; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2039; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2040; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2041; 2042; VI-LABEL: shl_inline_imm_neg_2.0_i16: 2043; VI: ; %bb.0: 2044; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2045; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0] 2046; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 2047; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2048; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2049; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2050; 2051; SI-LABEL: shl_inline_imm_neg_2.0_i16: 2052; SI: ; %bb.0: 2053; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2054; SI-NEXT: s_mov_b32 s6, 0 2055; SI-NEXT: s_mov_b32 s7, 0xf000 2056; SI-NEXT: s_mov_b32 s4, s6 2057; SI-NEXT: s_mov_b32 s5, s6 2058; SI-NEXT: v_lshl_b32_e32 v2, 0xffffc000, v2 2059; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2060; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2061; SI-NEXT: s_setpc_b64 s[30:31] 2062 %y = shl i16 bitcast (half -2.0 to i16), %x 2063 store i16 %y, i16 addrspace(1)* %out 2064 ret void 2065} 2066 2067define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) { 2068; GFX10-LABEL: mul_inline_imm_4.0_i16: 2069; GFX10: ; %bb.0: 2070; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2071; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2072; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] 2073; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2074; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2075; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2076; 2077; GFX11-LABEL: mul_inline_imm_4.0_i16: 2078; GFX11: ; %bb.0: 2079; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2080; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2081; GFX11-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] 2082; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2083; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2084; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2085; 2086; VI-LABEL: mul_inline_imm_4.0_i16: 2087; VI: ; %bb.0: 2088; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2089; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00] 2090; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2091; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2092; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2093; 2094; SI-LABEL: mul_inline_imm_4.0_i16: 2095; SI: ; %bb.0: 2096; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2097; SI-NEXT: s_mov_b32 s6, 0 2098; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2099; SI-NEXT: s_mov_b32 s7, 0xf000 2100; SI-NEXT: s_mov_b32 s4, s6 2101; SI-NEXT: s_mov_b32 s5, s6 2102; SI-NEXT: v_mul_u32_u24_e32 v2, 0x4400, v2 2103; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2104; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2105; SI-NEXT: s_setpc_b64 s[30:31] 2106 %y = mul i16 %x, bitcast (half 4.0 to i16) 2107 store i16 %y, i16 addrspace(1)* %out 2108 ret void 2109} 2110 2111define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) { 2112; GFX10-LABEL: mul_inline_imm_neg_4.0_i16: 2113; GFX10: ; %bb.0: 2114; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2115; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2116; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] 2117; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2118; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2119; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2120; 2121; GFX11-LABEL: mul_inline_imm_neg_4.0_i16: 2122; GFX11: ; %bb.0: 2123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2124; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2125; GFX11-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] 2126; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2127; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2128; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2129; 2130; VI-LABEL: mul_inline_imm_neg_4.0_i16: 2131; VI: ; %bb.0: 2132; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2133; VI-NEXT: v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] 2134; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2135; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2136; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2137; 2138; SI-LABEL: mul_inline_imm_neg_4.0_i16: 2139; SI: ; %bb.0: 2140; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2141; SI-NEXT: s_mov_b32 s6, 0 2142; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2143; SI-NEXT: s_mov_b32 s7, 0xf000 2144; SI-NEXT: s_mov_b32 s4, s6 2145; SI-NEXT: s_mov_b32 s5, s6 2146; SI-NEXT: v_mul_u32_u24_e32 v2, 0xc400, v2 2147; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2148; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2149; SI-NEXT: s_setpc_b64 s[30:31] 2150 %y = mul i16 %x, bitcast (half -4.0 to i16) 2151 store i16 %y, i16 addrspace(1)* %out 2152 ret void 2153} 2154 2155define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) { 2156; GFX10-LABEL: mul_inline_imm_inv2pi_i16: 2157; GFX10: ; %bb.0: 2158; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2159; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2160; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] 2161; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2162; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 2163; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2164; 2165; GFX11-LABEL: mul_inline_imm_inv2pi_i16: 2166; GFX11: ; %bb.0: 2167; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2168; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2169; GFX11-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] 2170; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2171; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 2172; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2173; 2174; VI-LABEL: mul_inline_imm_inv2pi_i16: 2175; VI: ; %bb.0: 2176; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2177; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00] 2178; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2179; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2180; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2181; 2182; SI-LABEL: mul_inline_imm_inv2pi_i16: 2183; SI: ; %bb.0: 2184; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2185; SI-NEXT: s_mov_b32 s6, 0 2186; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2187; SI-NEXT: s_mov_b32 s7, 0xf000 2188; SI-NEXT: s_mov_b32 s4, s6 2189; SI-NEXT: s_mov_b32 s5, s6 2190; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3118, v2 2191; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2192; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2193; SI-NEXT: s_setpc_b64 s[30:31] 2194 %y = mul i16 %x, bitcast (half 0xH3118 to i16) 2195 store i16 %y, i16 addrspace(1)* %out 2196 ret void 2197} 2198