1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s 4; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 5 6; FIXME: Merge into imm.ll 7 8define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { 9; GFX10-LABEL: store_inline_imm_neg_0.0_i16: 10; GFX10: ; %bb.0: 11; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 12; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 13; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 14; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 15; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 16; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 17; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 18; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 19; 20; VI-LABEL: store_inline_imm_neg_0.0_i16: 21; VI: ; %bb.0: 22; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 23; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 24; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 25; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 26; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 27; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 28; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 29; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 30; 31; SI-LABEL: store_inline_imm_neg_0.0_i16: 32; SI: ; %bb.0: 33; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 34; SI-NEXT: s_mov_b32 s3, 0xf000 35; SI-NEXT: s_mov_b32 s2, -1 36; SI-NEXT: v_mov_b32_e32 v0, 0x8000 37; SI-NEXT: s_waitcnt lgkmcnt(0) 38; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 39; SI-NEXT: s_waitcnt vmcnt(0) 40; SI-NEXT: s_endpgm 41 store volatile i16 -32768, i16 addrspace(1)* %out 42 ret void 43} 44 45define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { 46; GFX10-LABEL: store_inline_imm_0.0_f16: 47; GFX10: ; %bb.0: 48; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 49; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 50; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 51; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 52; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 53; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 54; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 55; 56; VI-LABEL: store_inline_imm_0.0_f16: 57; VI: ; %bb.0: 58; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 59; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 60; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 61; VI-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 62; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 63; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 64; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 65; 66; SI-LABEL: store_inline_imm_0.0_f16: 67; SI: ; %bb.0: 68; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 69; SI-NEXT: s_mov_b32 s3, 0xf000 70; SI-NEXT: s_mov_b32 s2, -1 71; SI-NEXT: v_mov_b32_e32 v0, 0 72; SI-NEXT: s_waitcnt lgkmcnt(0) 73; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 74; SI-NEXT: s_endpgm 75 store half 0.0, half addrspace(1)* %out 76 ret void 77} 78 79define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { 80; GFX10-LABEL: store_imm_neg_0.0_f16: 81; GFX10: ; %bb.0: 82; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 83; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 84; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 85; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 86; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 87; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 88; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 89; 90; VI-LABEL: store_imm_neg_0.0_f16: 91; VI: ; %bb.0: 92; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 93; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 94; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 95; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 96; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 97; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 98; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 99; 100; SI-LABEL: store_imm_neg_0.0_f16: 101; SI: ; %bb.0: 102; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 103; SI-NEXT: s_mov_b32 s3, 0xf000 104; SI-NEXT: s_mov_b32 s2, -1 105; SI-NEXT: v_mov_b32_e32 v0, 0x8000 106; SI-NEXT: s_waitcnt lgkmcnt(0) 107; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 108; SI-NEXT: s_endpgm 109 store half -0.0, half addrspace(1)* %out 110 ret void 111} 112 113define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { 114; GFX10-LABEL: store_inline_imm_0.5_f16: 115; GFX10: ; %bb.0: 116; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 117; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 118; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 119; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 120; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 121; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 122; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 123; 124; VI-LABEL: store_inline_imm_0.5_f16: 125; VI: ; %bb.0: 126; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 127; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 128; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 129; VI-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 130; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 131; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 132; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 133; 134; SI-LABEL: store_inline_imm_0.5_f16: 135; SI: ; %bb.0: 136; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 137; SI-NEXT: s_mov_b32 s3, 0xf000 138; SI-NEXT: s_mov_b32 s2, -1 139; SI-NEXT: v_mov_b32_e32 v0, 0x3800 140; SI-NEXT: s_waitcnt lgkmcnt(0) 141; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 142; SI-NEXT: s_endpgm 143 store half 0.5, half addrspace(1)* %out 144 ret void 145} 146 147define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { 148; GFX10-LABEL: store_inline_imm_m_0.5_f16: 149; GFX10: ; %bb.0: 150; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 151; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 152; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 153; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 154; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 155; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 156; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 157; 158; VI-LABEL: store_inline_imm_m_0.5_f16: 159; VI: ; %bb.0: 160; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 161; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 162; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 163; VI-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 164; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 165; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 166; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 167; 168; SI-LABEL: store_inline_imm_m_0.5_f16: 169; SI: ; %bb.0: 170; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 171; SI-NEXT: s_mov_b32 s3, 0xf000 172; SI-NEXT: s_mov_b32 s2, -1 173; SI-NEXT: v_mov_b32_e32 v0, 0xb800 174; SI-NEXT: s_waitcnt lgkmcnt(0) 175; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 176; SI-NEXT: s_endpgm 177 store half -0.5, half addrspace(1)* %out 178 ret void 179} 180 181define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { 182; GFX10-LABEL: store_inline_imm_1.0_f16: 183; GFX10: ; %bb.0: 184; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 185; GFX10-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 186; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 187; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 188; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 189; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 190; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 191; 192; VI-LABEL: store_inline_imm_1.0_f16: 193; VI: ; %bb.0: 194; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 195; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 196; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 197; VI-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 198; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 199; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 200; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 201; 202; SI-LABEL: store_inline_imm_1.0_f16: 203; SI: ; %bb.0: 204; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 205; SI-NEXT: s_mov_b32 s3, 0xf000 206; SI-NEXT: s_mov_b32 s2, -1 207; SI-NEXT: v_mov_b32_e32 v0, 0x3c00 208; SI-NEXT: s_waitcnt lgkmcnt(0) 209; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 210; SI-NEXT: s_endpgm 211 store half 1.0, half addrspace(1)* %out 212 ret void 213} 214 215define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { 216; GFX10-LABEL: store_inline_imm_m_1.0_f16: 217; GFX10: ; %bb.0: 218; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 219; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 220; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 221; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 222; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 223; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 224; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 225; 226; VI-LABEL: store_inline_imm_m_1.0_f16: 227; VI: ; %bb.0: 228; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 229; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 230; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 231; VI-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 232; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 233; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 234; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 235; 236; SI-LABEL: store_inline_imm_m_1.0_f16: 237; SI: ; %bb.0: 238; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 239; SI-NEXT: s_mov_b32 s3, 0xf000 240; SI-NEXT: s_mov_b32 s2, -1 241; SI-NEXT: v_mov_b32_e32 v0, 0xbc00 242; SI-NEXT: s_waitcnt lgkmcnt(0) 243; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 244; SI-NEXT: s_endpgm 245 store half -1.0, half addrspace(1)* %out 246 ret void 247} 248 249define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { 250; GFX10-LABEL: store_inline_imm_2.0_f16: 251; GFX10: ; %bb.0: 252; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 253; GFX10-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 254; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 255; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 256; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 257; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 258; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 259; 260; VI-LABEL: store_inline_imm_2.0_f16: 261; VI: ; %bb.0: 262; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 263; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 264; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 265; VI-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 266; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 267; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 268; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 269; 270; SI-LABEL: store_inline_imm_2.0_f16: 271; SI: ; %bb.0: 272; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 273; SI-NEXT: s_mov_b32 s3, 0xf000 274; SI-NEXT: s_mov_b32 s2, -1 275; SI-NEXT: v_mov_b32_e32 v0, 0x4000 276; SI-NEXT: s_waitcnt lgkmcnt(0) 277; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 278; SI-NEXT: s_endpgm 279 store half 2.0, half addrspace(1)* %out 280 ret void 281} 282 283define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { 284; GFX10-LABEL: store_inline_imm_m_2.0_f16: 285; GFX10: ; %bb.0: 286; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 287; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 288; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 289; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 290; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 291; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 292; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 293; 294; VI-LABEL: store_inline_imm_m_2.0_f16: 295; VI: ; %bb.0: 296; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 297; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 298; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 299; VI-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 300; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 301; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 302; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 303; 304; SI-LABEL: store_inline_imm_m_2.0_f16: 305; SI: ; %bb.0: 306; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 307; SI-NEXT: s_mov_b32 s3, 0xf000 308; SI-NEXT: s_mov_b32 s2, -1 309; SI-NEXT: v_mov_b32_e32 v0, 0xc000 310; SI-NEXT: s_waitcnt lgkmcnt(0) 311; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 312; SI-NEXT: s_endpgm 313 store half -2.0, half addrspace(1)* %out 314 ret void 315} 316 317define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { 318; GFX10-LABEL: store_inline_imm_4.0_f16: 319; GFX10: ; %bb.0: 320; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 321; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 322; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 323; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 324; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 325; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 326; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 327; 328; VI-LABEL: store_inline_imm_4.0_f16: 329; VI: ; %bb.0: 330; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 331; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 332; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 333; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 334; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 335; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 336; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 337; 338; SI-LABEL: store_inline_imm_4.0_f16: 339; SI: ; %bb.0: 340; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 341; SI-NEXT: s_mov_b32 s3, 0xf000 342; SI-NEXT: s_mov_b32 s2, -1 343; SI-NEXT: v_mov_b32_e32 v0, 0x4400 344; SI-NEXT: s_waitcnt lgkmcnt(0) 345; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 346; SI-NEXT: s_endpgm 347 store half 4.0, half addrspace(1)* %out 348 ret void 349} 350 351define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { 352; GFX10-LABEL: store_inline_imm_m_4.0_f16: 353; GFX10: ; %bb.0: 354; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 355; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 356; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 357; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 358; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 359; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 360; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 361; 362; VI-LABEL: store_inline_imm_m_4.0_f16: 363; VI: ; %bb.0: 364; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 365; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 366; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 367; VI-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 368; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 369; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 370; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 371; 372; SI-LABEL: store_inline_imm_m_4.0_f16: 373; SI: ; %bb.0: 374; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 375; SI-NEXT: s_mov_b32 s3, 0xf000 376; SI-NEXT: s_mov_b32 s2, -1 377; SI-NEXT: v_mov_b32_e32 v0, 0xc400 378; SI-NEXT: s_waitcnt lgkmcnt(0) 379; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 380; SI-NEXT: s_endpgm 381 store half -4.0, half addrspace(1)* %out 382 ret void 383} 384 385define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { 386; GFX10-LABEL: store_inline_imm_inv_2pi_f16: 387; GFX10: ; %bb.0: 388; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 389; GFX10-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 390; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 391; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 392; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 393; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 394; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 395; 396; VI-LABEL: store_inline_imm_inv_2pi_f16: 397; VI: ; %bb.0: 398; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 399; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 400; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 401; VI-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 402; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 403; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 404; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 405; 406; SI-LABEL: store_inline_imm_inv_2pi_f16: 407; SI: ; %bb.0: 408; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 409; SI-NEXT: s_mov_b32 s3, 0xf000 410; SI-NEXT: s_mov_b32 s2, -1 411; SI-NEXT: v_mov_b32_e32 v0, 0x3118 412; SI-NEXT: s_waitcnt lgkmcnt(0) 413; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 414; SI-NEXT: s_endpgm 415 store half 0xH3118, half addrspace(1)* %out 416 ret void 417} 418 419define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { 420; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16: 421; GFX10: ; %bb.0: 422; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 423; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 424; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 425; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 426; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 427; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 428; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 429; 430; VI-LABEL: store_inline_imm_m_inv_2pi_f16: 431; VI: ; %bb.0: 432; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 433; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 434; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 435; VI-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 436; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 437; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 438; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 439; 440; SI-LABEL: store_inline_imm_m_inv_2pi_f16: 441; SI: ; %bb.0: 442; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 443; SI-NEXT: s_mov_b32 s3, 0xf000 444; SI-NEXT: s_mov_b32 s2, -1 445; SI-NEXT: v_mov_b32_e32 v0, 0xb118 446; SI-NEXT: s_waitcnt lgkmcnt(0) 447; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 448; SI-NEXT: s_endpgm 449 store half 0xHB118, half addrspace(1)* %out 450 ret void 451} 452 453define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) { 454; GFX10-LABEL: store_literal_imm_f16: 455; GFX10: ; %bb.0: 456; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 457; GFX10-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 458; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 459; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 460; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 461; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 462; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 463; 464; VI-LABEL: store_literal_imm_f16: 465; VI: ; %bb.0: 466; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 467; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 468; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 469; VI-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 470; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 471; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 472; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 473; 474; SI-LABEL: store_literal_imm_f16: 475; SI: ; %bb.0: 476; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 477; SI-NEXT: s_mov_b32 s3, 0xf000 478; SI-NEXT: s_mov_b32 s2, -1 479; SI-NEXT: v_mov_b32_e32 v0, 0x6c00 480; SI-NEXT: s_waitcnt lgkmcnt(0) 481; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 482; SI-NEXT: s_endpgm 483 store half 4096.0, half addrspace(1)* %out 484 ret void 485} 486 487define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { 488; GFX10-LABEL: add_inline_imm_0.0_f16: 489; GFX10: ; %bb.0: 490; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 491; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 492; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 493; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 494; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 495; GFX10-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00] 496; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 497; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 498; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 499; 500; VI-LABEL: add_inline_imm_0.0_f16: 501; VI: ; %bb.0: 502; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 503; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 504; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 505; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 506; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 507; VI-NEXT: v_add_f16_e64 v0, s6, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x00,0x01,0x00] 508; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 509; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 510; 511; SI-LABEL: add_inline_imm_0.0_f16: 512; SI: ; %bb.0: 513; SI-NEXT: s_load_dword s2, s[0:1], 0xb 514; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 515; SI-NEXT: s_mov_b32 s3, 0xf000 516; SI-NEXT: s_waitcnt lgkmcnt(0) 517; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 518; SI-NEXT: s_mov_b32 s2, -1 519; SI-NEXT: v_add_f32_e32 v0, 0, v0 520; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 521; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 522; SI-NEXT: s_endpgm 523 %y = fadd half %x, 0.0 524 store half %y, half addrspace(1)* %out 525 ret void 526} 527 528define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { 529; GFX10-LABEL: add_inline_imm_0.5_f16: 530; GFX10: ; %bb.0: 531; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 532; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 533; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 534; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 535; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 536; GFX10-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00] 537; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 538; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 539; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 540; 541; VI-LABEL: add_inline_imm_0.5_f16: 542; VI: ; %bb.0: 543; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 544; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 545; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 546; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 547; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 548; VI-NEXT: v_add_f16_e64 v0, s6, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe0,0x01,0x00] 549; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 550; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 551; 552; SI-LABEL: add_inline_imm_0.5_f16: 553; SI: ; %bb.0: 554; SI-NEXT: s_load_dword s2, s[0:1], 0xb 555; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 556; SI-NEXT: s_mov_b32 s3, 0xf000 557; SI-NEXT: s_waitcnt lgkmcnt(0) 558; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 559; SI-NEXT: s_mov_b32 s2, -1 560; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 561; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 562; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 563; SI-NEXT: s_endpgm 564 %y = fadd half %x, 0.5 565 store half %y, half addrspace(1)* %out 566 ret void 567} 568 569define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { 570; GFX10-LABEL: add_inline_imm_neg_0.5_f16: 571; GFX10: ; %bb.0: 572; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 573; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 574; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 575; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 576; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 577; GFX10-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00] 578; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 579; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 580; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 581; 582; VI-LABEL: add_inline_imm_neg_0.5_f16: 583; VI: ; %bb.0: 584; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 585; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 586; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 587; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 588; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 589; VI-NEXT: v_add_f16_e64 v0, s6, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe2,0x01,0x00] 590; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 591; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 592; 593; SI-LABEL: add_inline_imm_neg_0.5_f16: 594; SI: ; %bb.0: 595; SI-NEXT: s_load_dword s2, s[0:1], 0xb 596; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 597; SI-NEXT: s_mov_b32 s3, 0xf000 598; SI-NEXT: s_waitcnt lgkmcnt(0) 599; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 600; SI-NEXT: s_mov_b32 s2, -1 601; SI-NEXT: v_add_f32_e32 v0, -0.5, v0 602; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 603; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 604; SI-NEXT: s_endpgm 605 %y = fadd half %x, -0.5 606 store half %y, half addrspace(1)* %out 607 ret void 608} 609 610define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { 611; GFX10-LABEL: add_inline_imm_1.0_f16: 612; GFX10: ; %bb.0: 613; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 614; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 615; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 616; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 617; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 618; GFX10-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00] 619; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 620; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 621; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 622; 623; VI-LABEL: add_inline_imm_1.0_f16: 624; VI: ; %bb.0: 625; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 626; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 627; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 628; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 629; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 630; VI-NEXT: v_add_f16_e64 v0, s6, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe4,0x01,0x00] 631; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 632; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 633; 634; SI-LABEL: add_inline_imm_1.0_f16: 635; SI: ; %bb.0: 636; SI-NEXT: s_load_dword s2, s[0:1], 0xb 637; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 638; SI-NEXT: s_mov_b32 s3, 0xf000 639; SI-NEXT: s_waitcnt lgkmcnt(0) 640; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 641; SI-NEXT: s_mov_b32 s2, -1 642; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 643; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 644; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 645; SI-NEXT: s_endpgm 646 %y = fadd half %x, 1.0 647 store half %y, half addrspace(1)* %out 648 ret void 649} 650 651define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { 652; GFX10-LABEL: add_inline_imm_neg_1.0_f16: 653; GFX10: ; %bb.0: 654; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 655; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 656; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 657; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 658; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 659; GFX10-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00] 660; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 661; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 662; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 663; 664; VI-LABEL: add_inline_imm_neg_1.0_f16: 665; VI: ; %bb.0: 666; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 667; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 668; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 669; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 670; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 671; VI-NEXT: v_add_f16_e64 v0, s6, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe6,0x01,0x00] 672; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 673; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 674; 675; SI-LABEL: add_inline_imm_neg_1.0_f16: 676; SI: ; %bb.0: 677; SI-NEXT: s_load_dword s2, s[0:1], 0xb 678; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 679; SI-NEXT: s_mov_b32 s3, 0xf000 680; SI-NEXT: s_waitcnt lgkmcnt(0) 681; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 682; SI-NEXT: s_mov_b32 s2, -1 683; SI-NEXT: v_add_f32_e32 v0, -1.0, v0 684; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 685; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 686; SI-NEXT: s_endpgm 687 %y = fadd half %x, -1.0 688 store half %y, half addrspace(1)* %out 689 ret void 690} 691 692define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { 693; GFX10-LABEL: add_inline_imm_2.0_f16: 694; GFX10: ; %bb.0: 695; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 696; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 697; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 698; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 699; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 700; GFX10-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00] 701; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 702; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 703; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 704; 705; VI-LABEL: add_inline_imm_2.0_f16: 706; VI: ; %bb.0: 707; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 708; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 709; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 710; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 711; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 712; VI-NEXT: v_add_f16_e64 v0, s6, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xe8,0x01,0x00] 713; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 714; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 715; 716; SI-LABEL: add_inline_imm_2.0_f16: 717; SI: ; %bb.0: 718; SI-NEXT: s_load_dword s2, s[0:1], 0xb 719; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 720; SI-NEXT: s_mov_b32 s3, 0xf000 721; SI-NEXT: s_waitcnt lgkmcnt(0) 722; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 723; SI-NEXT: s_mov_b32 s2, -1 724; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 725; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 726; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 727; SI-NEXT: s_endpgm 728 %y = fadd half %x, 2.0 729 store half %y, half addrspace(1)* %out 730 ret void 731} 732 733define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { 734; GFX10-LABEL: add_inline_imm_neg_2.0_f16: 735; GFX10: ; %bb.0: 736; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 737; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 738; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 739; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 740; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 741; GFX10-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00] 742; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 743; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 744; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 745; 746; VI-LABEL: add_inline_imm_neg_2.0_f16: 747; VI: ; %bb.0: 748; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 749; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 750; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 751; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 752; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 753; VI-NEXT: v_add_f16_e64 v0, s6, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xea,0x01,0x00] 754; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 755; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 756; 757; SI-LABEL: add_inline_imm_neg_2.0_f16: 758; SI: ; %bb.0: 759; SI-NEXT: s_load_dword s2, s[0:1], 0xb 760; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 761; SI-NEXT: s_mov_b32 s3, 0xf000 762; SI-NEXT: s_waitcnt lgkmcnt(0) 763; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 764; SI-NEXT: s_mov_b32 s2, -1 765; SI-NEXT: v_add_f32_e32 v0, -2.0, v0 766; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 767; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 768; SI-NEXT: s_endpgm 769 %y = fadd half %x, -2.0 770 store half %y, half addrspace(1)* %out 771 ret void 772} 773 774define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { 775; GFX10-LABEL: add_inline_imm_4.0_f16: 776; GFX10: ; %bb.0: 777; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 778; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 779; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 780; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 781; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 782; GFX10-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00] 783; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 784; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 785; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 786; 787; VI-LABEL: add_inline_imm_4.0_f16: 788; VI: ; %bb.0: 789; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 790; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 791; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 792; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 793; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 794; VI-NEXT: v_add_f16_e64 v0, s6, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xec,0x01,0x00] 795; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 796; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 797; 798; SI-LABEL: add_inline_imm_4.0_f16: 799; SI: ; %bb.0: 800; SI-NEXT: s_load_dword s2, s[0:1], 0xb 801; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 802; SI-NEXT: s_mov_b32 s3, 0xf000 803; SI-NEXT: s_waitcnt lgkmcnt(0) 804; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 805; SI-NEXT: s_mov_b32 s2, -1 806; SI-NEXT: v_add_f32_e32 v0, 4.0, v0 807; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 808; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 809; SI-NEXT: s_endpgm 810 %y = fadd half %x, 4.0 811 store half %y, half addrspace(1)* %out 812 ret void 813} 814 815define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { 816; GFX10-LABEL: add_inline_imm_neg_4.0_f16: 817; GFX10: ; %bb.0: 818; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 819; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 820; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 821; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 822; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 823; GFX10-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00] 824; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 825; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 826; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 827; 828; VI-LABEL: add_inline_imm_neg_4.0_f16: 829; VI: ; %bb.0: 830; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 831; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 832; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 833; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 834; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 835; VI-NEXT: v_add_f16_e64 v0, s6, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0xee,0x01,0x00] 836; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 837; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 838; 839; SI-LABEL: add_inline_imm_neg_4.0_f16: 840; SI: ; %bb.0: 841; SI-NEXT: s_load_dword s2, s[0:1], 0xb 842; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 843; SI-NEXT: s_mov_b32 s3, 0xf000 844; SI-NEXT: s_waitcnt lgkmcnt(0) 845; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 846; SI-NEXT: s_mov_b32 s2, -1 847; SI-NEXT: v_add_f32_e32 v0, -4.0, v0 848; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 849; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 850; SI-NEXT: s_endpgm 851 %y = fadd half %x, -4.0 852 store half %y, half addrspace(1)* %out 853 ret void 854} 855 856define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 857; GFX10-LABEL: commute_add_inline_imm_0.5_f16: 858; GFX10: ; %bb.0: 859; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 860; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 861; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 862; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 863; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 864; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 865; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 866; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 867; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 868; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 869; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 870; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 871; GFX10-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64] 872; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 873; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 874; 875; VI-LABEL: commute_add_inline_imm_0.5_f16: 876; VI: ; %bb.0: 877; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 878; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 879; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 880; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 881; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 882; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 883; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 884; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 885; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 886; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 887; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 888; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 889; VI-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e] 890; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 891; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 892; 893; SI-LABEL: commute_add_inline_imm_0.5_f16: 894; SI: ; %bb.0: 895; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 896; SI-NEXT: s_mov_b32 s7, 0xf000 897; SI-NEXT: s_mov_b32 s6, -1 898; SI-NEXT: s_mov_b32 s10, s6 899; SI-NEXT: s_mov_b32 s11, s7 900; SI-NEXT: s_waitcnt lgkmcnt(0) 901; SI-NEXT: s_mov_b32 s8, s2 902; SI-NEXT: s_mov_b32 s9, s3 903; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 904; SI-NEXT: s_mov_b32 s4, s0 905; SI-NEXT: s_mov_b32 s5, s1 906; SI-NEXT: s_waitcnt vmcnt(0) 907; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 908; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 909; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 910; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 911; SI-NEXT: s_endpgm 912 %x = load half, half addrspace(1)* %in 913 %y = fadd half %x, 0.5 914 store half %y, half addrspace(1)* %out 915 ret void 916} 917 918define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 919; GFX10-LABEL: commute_add_literal_f16: 920; GFX10: ; %bb.0: 921; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 922; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 923; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 924; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 925; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 926; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 927; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 928; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 929; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 930; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 931; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 932; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 933; GFX10-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00] 934; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 935; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 936; 937; VI-LABEL: commute_add_literal_f16: 938; VI: ; %bb.0: 939; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 940; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 941; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 942; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 943; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 944; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 945; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 946; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 947; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 948; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 949; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 950; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 951; VI-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00] 952; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 953; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 954; 955; SI-LABEL: commute_add_literal_f16: 956; SI: ; %bb.0: 957; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 958; SI-NEXT: s_mov_b32 s7, 0xf000 959; SI-NEXT: s_mov_b32 s6, -1 960; SI-NEXT: s_mov_b32 s10, s6 961; SI-NEXT: s_mov_b32 s11, s7 962; SI-NEXT: s_waitcnt lgkmcnt(0) 963; SI-NEXT: s_mov_b32 s8, s2 964; SI-NEXT: s_mov_b32 s9, s3 965; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 966; SI-NEXT: s_mov_b32 s4, s0 967; SI-NEXT: s_mov_b32 s5, s1 968; SI-NEXT: s_waitcnt vmcnt(0) 969; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 970; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 971; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 972; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 973; SI-NEXT: s_endpgm 974 %x = load half, half addrspace(1)* %in 975 %y = fadd half %x, 1024.0 976 store half %y, half addrspace(1)* %out 977 ret void 978} 979 980define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { 981; GFX10-LABEL: add_inline_imm_1_f16: 982; GFX10: ; %bb.0: 983; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 984; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 985; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 986; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 987; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 988; GFX10-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00] 989; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 990; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 991; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 992; 993; VI-LABEL: add_inline_imm_1_f16: 994; VI: ; %bb.0: 995; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 996; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 997; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 998; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 999; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1000; VI-NEXT: v_add_f16_e64 v0, s6, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x02,0x01,0x00] 1001; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1002; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1003; 1004; SI-LABEL: add_inline_imm_1_f16: 1005; SI: ; %bb.0: 1006; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1007; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1008; SI-NEXT: s_mov_b32 s3, 0xf000 1009; SI-NEXT: s_waitcnt lgkmcnt(0) 1010; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1011; SI-NEXT: s_mov_b32 s2, -1 1012; SI-NEXT: v_add_f32_e32 v0, 0x33800000, v0 1013; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1014; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1015; SI-NEXT: s_endpgm 1016 %y = fadd half %x, 0xH0001 1017 store half %y, half addrspace(1)* %out 1018 ret void 1019} 1020 1021define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { 1022; GFX10-LABEL: add_inline_imm_2_f16: 1023; GFX10: ; %bb.0: 1024; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1025; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1026; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1027; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1028; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1029; GFX10-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00] 1030; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1031; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1032; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1033; 1034; VI-LABEL: add_inline_imm_2_f16: 1035; VI: ; %bb.0: 1036; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1037; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1038; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1039; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1040; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1041; VI-NEXT: v_add_f16_e64 v0, s6, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x04,0x01,0x00] 1042; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1043; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1044; 1045; SI-LABEL: add_inline_imm_2_f16: 1046; SI: ; %bb.0: 1047; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1048; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1049; SI-NEXT: s_mov_b32 s3, 0xf000 1050; SI-NEXT: s_waitcnt lgkmcnt(0) 1051; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1052; SI-NEXT: s_mov_b32 s2, -1 1053; SI-NEXT: v_add_f32_e32 v0, 0x34000000, v0 1054; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1055; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1056; SI-NEXT: s_endpgm 1057 %y = fadd half %x, 0xH0002 1058 store half %y, half addrspace(1)* %out 1059 ret void 1060} 1061 1062define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { 1063; GFX10-LABEL: add_inline_imm_16_f16: 1064; GFX10: ; %bb.0: 1065; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1066; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1067; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1068; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1069; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1070; GFX10-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00] 1071; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1072; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1073; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1074; 1075; VI-LABEL: add_inline_imm_16_f16: 1076; VI: ; %bb.0: 1077; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1078; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1079; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1080; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1081; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1082; VI-NEXT: v_add_f16_e64 v0, s6, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x20,0x01,0x00] 1083; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1084; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1085; 1086; SI-LABEL: add_inline_imm_16_f16: 1087; SI: ; %bb.0: 1088; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1089; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1090; SI-NEXT: s_mov_b32 s3, 0xf000 1091; SI-NEXT: s_waitcnt lgkmcnt(0) 1092; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1093; SI-NEXT: s_mov_b32 s2, -1 1094; SI-NEXT: v_add_f32_e32 v0, 0x35800000, v0 1095; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1096; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1097; SI-NEXT: s_endpgm 1098 %y = fadd half %x, 0xH0010 1099 store half %y, half addrspace(1)* %out 1100 ret void 1101} 1102 1103define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1104; GFX10-LABEL: add_inline_imm_neg_1_f16: 1105; GFX10: ; %bb.0: 1106; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1107; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1108; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1109; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1110; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1111; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1112; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1113; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1114; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1115; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1116; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1117; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1118; GFX10-NEXT: v_add_nc_u16 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00] 1119; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1120; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1121; 1122; VI-LABEL: add_inline_imm_neg_1_f16: 1123; VI: ; %bb.0: 1124; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1125; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1126; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1127; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1128; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1129; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1130; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1131; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1132; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1133; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1134; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1135; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1136; VI-NEXT: v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c] 1137; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1138; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1139; 1140; SI-LABEL: add_inline_imm_neg_1_f16: 1141; SI: ; %bb.0: 1142; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1143; SI-NEXT: s_mov_b32 s7, 0xf000 1144; SI-NEXT: s_mov_b32 s6, -1 1145; SI-NEXT: s_mov_b32 s10, s6 1146; SI-NEXT: s_mov_b32 s11, s7 1147; SI-NEXT: s_waitcnt lgkmcnt(0) 1148; SI-NEXT: s_mov_b32 s8, s2 1149; SI-NEXT: s_mov_b32 s9, s3 1150; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1151; SI-NEXT: s_mov_b32 s4, s0 1152; SI-NEXT: s_mov_b32 s5, s1 1153; SI-NEXT: s_waitcnt vmcnt(0) 1154; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0 1155; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1156; SI-NEXT: s_endpgm 1157 %x = load i16, i16 addrspace(1)* %in 1158 %y = add i16 %x, -1 1159 %ybc = bitcast i16 %y to half 1160 store half %ybc, half addrspace(1)* %out 1161 ret void 1162} 1163 1164define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1165; GFX10-LABEL: add_inline_imm_neg_2_f16: 1166; GFX10: ; %bb.0: 1167; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1168; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1169; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1170; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1171; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1172; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1173; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1174; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1175; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1176; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1177; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1178; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1179; GFX10-NEXT: v_add_nc_u16 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00] 1180; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1181; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1182; 1183; VI-LABEL: add_inline_imm_neg_2_f16: 1184; VI: ; %bb.0: 1185; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1186; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1187; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1188; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1189; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1190; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1191; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1192; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1193; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1194; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1195; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1196; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1197; VI-NEXT: v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c] 1198; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1199; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1200; 1201; SI-LABEL: add_inline_imm_neg_2_f16: 1202; SI: ; %bb.0: 1203; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1204; SI-NEXT: s_mov_b32 s7, 0xf000 1205; SI-NEXT: s_mov_b32 s6, -1 1206; SI-NEXT: s_mov_b32 s10, s6 1207; SI-NEXT: s_mov_b32 s11, s7 1208; SI-NEXT: s_waitcnt lgkmcnt(0) 1209; SI-NEXT: s_mov_b32 s8, s2 1210; SI-NEXT: s_mov_b32 s9, s3 1211; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1212; SI-NEXT: s_mov_b32 s4, s0 1213; SI-NEXT: s_mov_b32 s5, s1 1214; SI-NEXT: s_waitcnt vmcnt(0) 1215; SI-NEXT: v_add_i32_e32 v0, vcc, -2, v0 1216; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1217; SI-NEXT: s_endpgm 1218 %x = load i16, i16 addrspace(1)* %in 1219 %y = add i16 %x, -2 1220 %ybc = bitcast i16 %y to half 1221 store half %ybc, half addrspace(1)* %out 1222 ret void 1223} 1224 1225define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 1226; GFX10-LABEL: add_inline_imm_neg_16_f16: 1227; GFX10: ; %bb.0: 1228; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1229; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1230; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1231; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1232; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1233; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1234; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1235; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1236; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1237; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1238; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1239; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1240; GFX10-NEXT: v_add_nc_u16 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00] 1241; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1242; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1243; 1244; VI-LABEL: add_inline_imm_neg_16_f16: 1245; VI: ; %bb.0: 1246; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1247; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1248; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1249; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1250; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1251; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1252; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1253; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1254; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1255; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1256; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1257; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1258; VI-NEXT: v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c] 1259; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1260; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1261; 1262; SI-LABEL: add_inline_imm_neg_16_f16: 1263; SI: ; %bb.0: 1264; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1265; SI-NEXT: s_mov_b32 s7, 0xf000 1266; SI-NEXT: s_mov_b32 s6, -1 1267; SI-NEXT: s_mov_b32 s10, s6 1268; SI-NEXT: s_mov_b32 s11, s7 1269; SI-NEXT: s_waitcnt lgkmcnt(0) 1270; SI-NEXT: s_mov_b32 s8, s2 1271; SI-NEXT: s_mov_b32 s9, s3 1272; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1273; SI-NEXT: s_mov_b32 s4, s0 1274; SI-NEXT: s_mov_b32 s5, s1 1275; SI-NEXT: s_waitcnt vmcnt(0) 1276; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0 1277; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1278; SI-NEXT: s_endpgm 1279 %x = load i16, i16 addrspace(1)* %in 1280 %y = add i16 %x, -16 1281 %ybc = bitcast i16 %y to half 1282 store half %ybc, half addrspace(1)* %out 1283 ret void 1284} 1285 1286define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { 1287; GFX10-LABEL: add_inline_imm_63_f16: 1288; GFX10: ; %bb.0: 1289; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1290; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1291; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1292; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1293; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1294; GFX10-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00] 1295; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1296; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1297; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1298; 1299; VI-LABEL: add_inline_imm_63_f16: 1300; VI: ; %bb.0: 1301; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1302; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1303; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1304; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1305; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1306; VI-NEXT: v_add_f16_e64 v0, s6, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x7e,0x01,0x00] 1307; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1308; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1309; 1310; SI-LABEL: add_inline_imm_63_f16: 1311; SI: ; %bb.0: 1312; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1313; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1314; SI-NEXT: s_mov_b32 s3, 0xf000 1315; SI-NEXT: s_waitcnt lgkmcnt(0) 1316; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1317; SI-NEXT: s_mov_b32 s2, -1 1318; SI-NEXT: v_add_f32_e32 v0, 0x367c0000, v0 1319; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1320; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1321; SI-NEXT: s_endpgm 1322 %y = fadd half %x, 0xH003F 1323 store half %y, half addrspace(1)* %out 1324 ret void 1325} 1326 1327define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { 1328; GFX10-LABEL: add_inline_imm_64_f16: 1329; GFX10: ; %bb.0: 1330; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1331; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1332; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1333; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1334; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1335; GFX10-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00] 1336; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1337; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1338; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1339; 1340; VI-LABEL: add_inline_imm_64_f16: 1341; VI: ; %bb.0: 1342; VI-NEXT: s_load_dword s6, s[4:5], 0x8 ; encoding: [0x82,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1343; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1344; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1345; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1346; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1347; VI-NEXT: v_add_f16_e64 v0, s6, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x06,0x80,0x01,0x00] 1348; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1349; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1350; 1351; SI-LABEL: add_inline_imm_64_f16: 1352; SI: ; %bb.0: 1353; SI-NEXT: s_load_dword s2, s[0:1], 0xb 1354; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1355; SI-NEXT: s_mov_b32 s3, 0xf000 1356; SI-NEXT: s_waitcnt lgkmcnt(0) 1357; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 1358; SI-NEXT: s_mov_b32 s2, -1 1359; SI-NEXT: v_add_f32_e32 v0, 0x36800000, v0 1360; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1361; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1362; SI-NEXT: s_endpgm 1363 %y = fadd half %x, 0xH0040 1364 store half %y, half addrspace(1)* %out 1365 ret void 1366} 1367 1368; This needs to be emitted as a literal constant since the 16-bit 1369; float values do not work for 16-bit integer operations. 1370define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) { 1371; GFX10-LABEL: mul_inline_imm_0.5_i16: 1372; GFX10: ; %bb.0: 1373; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1374; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1375; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 1376; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1377; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1378; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1379; 1380; VI-LABEL: mul_inline_imm_0.5_i16: 1381; VI: ; %bb.0: 1382; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1383; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00] 1384; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1385; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1386; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1387; 1388; SI-LABEL: mul_inline_imm_0.5_i16: 1389; SI: ; %bb.0: 1390; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1391; SI-NEXT: s_mov_b32 s6, 0 1392; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1393; SI-NEXT: s_mov_b32 s7, 0xf000 1394; SI-NEXT: s_mov_b32 s4, s6 1395; SI-NEXT: s_mov_b32 s5, s6 1396; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3800, v2 1397; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1398; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1399; SI-NEXT: s_setpc_b64 s[30:31] 1400 %y = mul i16 %x, bitcast (half 0.5 to i16) 1401 store i16 %y, i16 addrspace(1)* %out 1402 ret void 1403} 1404 1405define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) { 1406; GFX10-LABEL: mul_inline_imm_neg_0.5_i16: 1407; GFX10: ; %bb.0: 1408; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1409; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1410; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] 1411; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1412; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1413; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1414; 1415; VI-LABEL: mul_inline_imm_neg_0.5_i16: 1416; VI: ; %bb.0: 1417; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1418; VI-NEXT: v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] 1419; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1420; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1421; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1422; 1423; SI-LABEL: mul_inline_imm_neg_0.5_i16: 1424; SI: ; %bb.0: 1425; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1426; SI-NEXT: s_mov_b32 s6, 0 1427; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1428; SI-NEXT: s_mov_b32 s7, 0xf000 1429; SI-NEXT: s_mov_b32 s4, s6 1430; SI-NEXT: s_mov_b32 s5, s6 1431; SI-NEXT: v_mul_u32_u24_e32 v2, 0xb800, v2 1432; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1433; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1434; SI-NEXT: s_setpc_b64 s[30:31] 1435 %y = mul i16 %x, bitcast (half -0.5 to i16) 1436 store i16 %y, i16 addrspace(1)* %out 1437 ret void 1438} 1439 1440define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) { 1441; GFX10-LABEL: mul_inline_imm_1.0_i16: 1442; GFX10: ; %bb.0: 1443; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1444; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1445; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] 1446; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1447; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1448; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1449; 1450; VI-LABEL: mul_inline_imm_1.0_i16: 1451; VI: ; %bb.0: 1452; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1453; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00] 1454; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1455; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1456; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1457; 1458; SI-LABEL: mul_inline_imm_1.0_i16: 1459; SI: ; %bb.0: 1460; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; SI-NEXT: s_mov_b32 s6, 0 1462; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1463; SI-NEXT: s_mov_b32 s7, 0xf000 1464; SI-NEXT: s_mov_b32 s4, s6 1465; SI-NEXT: s_mov_b32 s5, s6 1466; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3c00, v2 1467; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1468; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1469; SI-NEXT: s_setpc_b64 s[30:31] 1470 %y = mul i16 %x, bitcast (half 1.0 to i16) 1471 store i16 %y, i16 addrspace(1)* %out 1472 ret void 1473} 1474 1475define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) { 1476; GFX10-LABEL: mul_inline_imm_neg_1.0_i16: 1477; GFX10: ; %bb.0: 1478; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1479; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1480; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] 1481; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1482; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1483; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1484; 1485; VI-LABEL: mul_inline_imm_neg_1.0_i16: 1486; VI: ; %bb.0: 1487; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1488; VI-NEXT: v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] 1489; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1490; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1491; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1492; 1493; SI-LABEL: mul_inline_imm_neg_1.0_i16: 1494; SI: ; %bb.0: 1495; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1496; SI-NEXT: s_mov_b32 s6, 0 1497; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1498; SI-NEXT: s_mov_b32 s7, 0xf000 1499; SI-NEXT: s_mov_b32 s4, s6 1500; SI-NEXT: s_mov_b32 s5, s6 1501; SI-NEXT: v_mul_u32_u24_e32 v2, 0xbc00, v2 1502; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1503; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1504; SI-NEXT: s_setpc_b64 s[30:31] 1505 %y = mul i16 %x, bitcast (half -1.0 to i16) 1506 store i16 %y, i16 addrspace(1)* %out 1507 ret void 1508} 1509 1510define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) { 1511; GFX10-LABEL: shl_inline_imm_2.0_i16: 1512; GFX10: ; %bb.0: 1513; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1514; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1515; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] 1516; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1517; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1518; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1519; 1520; VI-LABEL: shl_inline_imm_2.0_i16: 1521; VI: ; %bb.0: 1522; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1523; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0] 1524; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 1525; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1526; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1527; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1528; 1529; SI-LABEL: shl_inline_imm_2.0_i16: 1530; SI: ; %bb.0: 1531; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1532; SI-NEXT: s_mov_b32 s6, 0 1533; SI-NEXT: s_mov_b32 s7, 0xf000 1534; SI-NEXT: s_mov_b32 s4, s6 1535; SI-NEXT: s_mov_b32 s5, s6 1536; SI-NEXT: v_lshl_b32_e32 v2, 0x4000, v2 1537; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1538; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1539; SI-NEXT: s_setpc_b64 s[30:31] 1540 %y = shl i16 bitcast (half 2.0 to i16), %x 1541 store i16 %y, i16 addrspace(1)* %out 1542 ret void 1543} 1544 1545define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) { 1546; GFX10-LABEL: shl_inline_imm_neg_2.0_i16: 1547; GFX10: ; %bb.0: 1548; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1549; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1550; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] 1551; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1552; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1553; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1554; 1555; VI-LABEL: shl_inline_imm_neg_2.0_i16: 1556; VI: ; %bb.0: 1557; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1558; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0] 1559; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 1560; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1561; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1562; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1563; 1564; SI-LABEL: shl_inline_imm_neg_2.0_i16: 1565; SI: ; %bb.0: 1566; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1567; SI-NEXT: s_mov_b32 s6, 0 1568; SI-NEXT: s_mov_b32 s7, 0xf000 1569; SI-NEXT: s_mov_b32 s4, s6 1570; SI-NEXT: s_mov_b32 s5, s6 1571; SI-NEXT: v_lshl_b32_e32 v2, 0xffffc000, v2 1572; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1573; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1574; SI-NEXT: s_setpc_b64 s[30:31] 1575 %y = shl i16 bitcast (half -2.0 to i16), %x 1576 store i16 %y, i16 addrspace(1)* %out 1577 ret void 1578} 1579 1580define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) { 1581; GFX10-LABEL: mul_inline_imm_4.0_i16: 1582; GFX10: ; %bb.0: 1583; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1584; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1585; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] 1586; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1587; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1588; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1589; 1590; VI-LABEL: mul_inline_imm_4.0_i16: 1591; VI: ; %bb.0: 1592; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1593; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00] 1594; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1595; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1596; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1597; 1598; SI-LABEL: mul_inline_imm_4.0_i16: 1599; SI: ; %bb.0: 1600; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; SI-NEXT: s_mov_b32 s6, 0 1602; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1603; SI-NEXT: s_mov_b32 s7, 0xf000 1604; SI-NEXT: s_mov_b32 s4, s6 1605; SI-NEXT: s_mov_b32 s5, s6 1606; SI-NEXT: v_mul_u32_u24_e32 v2, 0x4400, v2 1607; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1608; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1609; SI-NEXT: s_setpc_b64 s[30:31] 1610 %y = mul i16 %x, bitcast (half 4.0 to i16) 1611 store i16 %y, i16 addrspace(1)* %out 1612 ret void 1613} 1614 1615define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) { 1616; GFX10-LABEL: mul_inline_imm_neg_4.0_i16: 1617; GFX10: ; %bb.0: 1618; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1619; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1620; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] 1621; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1622; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1623; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1624; 1625; VI-LABEL: mul_inline_imm_neg_4.0_i16: 1626; VI: ; %bb.0: 1627; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1628; VI-NEXT: v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] 1629; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1630; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1631; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1632; 1633; SI-LABEL: mul_inline_imm_neg_4.0_i16: 1634; SI: ; %bb.0: 1635; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1636; SI-NEXT: s_mov_b32 s6, 0 1637; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1638; SI-NEXT: s_mov_b32 s7, 0xf000 1639; SI-NEXT: s_mov_b32 s4, s6 1640; SI-NEXT: s_mov_b32 s5, s6 1641; SI-NEXT: v_mul_u32_u24_e32 v2, 0xc400, v2 1642; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1643; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1644; SI-NEXT: s_setpc_b64 s[30:31] 1645 %y = mul i16 %x, bitcast (half -4.0 to i16) 1646 store i16 %y, i16 addrspace(1)* %out 1647 ret void 1648} 1649 1650define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) { 1651; GFX10-LABEL: mul_inline_imm_inv2pi_i16: 1652; GFX10: ; %bb.0: 1653; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1654; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1655; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] 1656; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1657; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 1658; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1659; 1660; VI-LABEL: mul_inline_imm_inv2pi_i16: 1661; VI: ; %bb.0: 1662; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1663; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00] 1664; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1665; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1666; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1667; 1668; SI-LABEL: mul_inline_imm_inv2pi_i16: 1669; SI: ; %bb.0: 1670; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1671; SI-NEXT: s_mov_b32 s6, 0 1672; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1673; SI-NEXT: s_mov_b32 s7, 0xf000 1674; SI-NEXT: s_mov_b32 s4, s6 1675; SI-NEXT: s_mov_b32 s5, s6 1676; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3118, v2 1677; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1678; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1679; SI-NEXT: s_setpc_b64 s[30:31] 1680 %y = mul i16 %x, bitcast (half 0xH3118 to i16) 1681 store i16 %y, i16 addrspace(1)* %out 1682 ret void 1683} 1684