1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5; Test splitting flat instruction offsets into the low and high bits 6; when the offset doesn't fit in the offset field. 7 8define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) { 9; GFX9-LABEL: global_inst_valu_offset_1: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 13; GFX9-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX10-LABEL: global_inst_valu_offset_1: 16; GFX10: ; %bb.0: 17; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 19; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 20; GFX10-NEXT: ; implicit-def: $vcc_hi 21; GFX10-NEXT: s_setpc_b64 s[30:31] 22 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 23 %load = load i8, i8 addrspace(1)* %gep, align 4 24 ret i8 %load 25} 26 27define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) { 28; GFX9-LABEL: global_inst_valu_offset_11bit_max: 29; GFX9: ; %bb.0: 30; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX10-LABEL: global_inst_valu_offset_11bit_max: 35; GFX10: ; %bb.0: 36; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 38; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 39; GFX10-NEXT: ; implicit-def: $vcc_hi 40; GFX10-NEXT: s_setpc_b64 s[30:31] 41 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 42 %load = load i8, i8 addrspace(1)* %gep, align 4 43 ret i8 %load 44} 45 46define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) { 47; GFX9-LABEL: global_inst_valu_offset_12bit_max: 48; GFX9: ; %bb.0: 49; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 51; GFX9-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX10-LABEL: global_inst_valu_offset_12bit_max: 54; GFX10: ; %bb.0: 55; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 57; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 58; GFX10-NEXT: ; implicit-def: $vcc_hi 59; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 60; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 61; GFX10-NEXT: s_setpc_b64 s[30:31] 62 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 63 %load = load i8, i8 addrspace(1)* %gep, align 4 64 ret i8 %load 65} 66 67define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) { 68; GFX9-LABEL: global_inst_valu_offset_13bit_max: 69; GFX9: ; %bb.0: 70; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 72; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 73; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 74; GFX9-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX10-LABEL: global_inst_valu_offset_13bit_max: 77; GFX10: ; %bb.0: 78; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 80; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 81; GFX10-NEXT: ; implicit-def: $vcc_hi 82; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 83; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 84; GFX10-NEXT: s_setpc_b64 s[30:31] 85 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 86 %load = load i8, i8 addrspace(1)* %gep, align 4 87 ret i8 %load 88} 89 90define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 91; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: 92; GFX9: ; %bb.0: 93; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 95; GFX9-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: 98; GFX10: ; %bb.0: 99; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 101; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 102; GFX10-NEXT: ; implicit-def: $vcc_hi 103; GFX10-NEXT: s_setpc_b64 s[30:31] 104 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 105 %load = load i8, i8 addrspace(1)* %gep, align 4 106 ret i8 %load 107} 108 109define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 110; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: 111; GFX9: ; %bb.0: 112; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 114; GFX9-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: 117; GFX10: ; %bb.0: 118; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 120; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 121; GFX10-NEXT: ; implicit-def: $vcc_hi 122; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 123; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 124; GFX10-NEXT: s_setpc_b64 s[30:31] 125 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 126 %load = load i8, i8 addrspace(1)* %gep, align 4 127 ret i8 %load 128} 129 130define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 131; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: 132; GFX9: ; %bb.0: 133; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 135; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 136; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 137; GFX9-NEXT: s_setpc_b64 s[30:31] 138; 139; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: 140; GFX10: ; %bb.0: 141; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 144; GFX10-NEXT: ; implicit-def: $vcc_hi 145; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 146; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 147; GFX10-NEXT: s_setpc_b64 s[30:31] 148 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 149 %load = load i8, i8 addrspace(1)* %gep, align 4 150 ret i8 %load 151} 152 153define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 154; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: 155; GFX9: ; %bb.0: 156; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 158; GFX9-NEXT: s_setpc_b64 s[30:31] 159; 160; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max: 161; GFX10: ; %bb.0: 162; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 164; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 165; GFX10-NEXT: ; implicit-def: $vcc_hi 166; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 167; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 168; GFX10-NEXT: s_setpc_b64 s[30:31] 169 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 170 %load = load i8, i8 addrspace(1)* %gep, align 4 171 ret i8 %load 172} 173 174define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 175; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max: 176; GFX9: ; %bb.0: 177; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 179; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 180; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 181; GFX9-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max: 184; GFX10: ; %bb.0: 185; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 187; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 188; GFX10-NEXT: ; implicit-def: $vcc_hi 189; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 190; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 191; GFX10-NEXT: s_setpc_b64 s[30:31] 192 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 193 %load = load i8, i8 addrspace(1)* %gep, align 4 194 ret i8 %load 195} 196 197define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 198; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max: 199; GFX9: ; %bb.0: 200; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 202; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 203; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 204; GFX9-NEXT: s_setpc_b64 s[30:31] 205; 206; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max: 207; GFX10: ; %bb.0: 208; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 210; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0 211; GFX10-NEXT: ; implicit-def: $vcc_hi 212; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 213; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 214; GFX10-NEXT: s_setpc_b64 s[30:31] 215 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 216 %load = load i8, i8 addrspace(1)* %gep, align 4 217 ret i8 %load 218} 219 220define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 221; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 222; GFX9: ; %bb.0: 223; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 225; GFX9-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 228; GFX10: ; %bb.0: 229; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 231; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 232; GFX10-NEXT: ; implicit-def: $vcc_hi 233; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 234; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 235; GFX10-NEXT: s_setpc_b64 s[30:31] 236 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 237 %load = load i8, i8 addrspace(1)* %gep, align 4 238 ret i8 %load 239} 240 241define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 242; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 243; GFX9: ; %bb.0: 244; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 246; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 247; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 248; GFX9-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 251; GFX10: ; %bb.0: 252; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 254; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 255; GFX10-NEXT: ; implicit-def: $vcc_hi 256; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 257; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 258; GFX10-NEXT: s_setpc_b64 s[30:31] 259 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 260 %load = load i8, i8 addrspace(1)* %gep, align 4 261 ret i8 %load 262} 263 264define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 265; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 266; GFX9: ; %bb.0: 267; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 269; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 270; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 271; GFX9-NEXT: s_setpc_b64 s[30:31] 272; 273; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 274; GFX10: ; %bb.0: 275; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 276; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 277; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 278; GFX10-NEXT: ; implicit-def: $vcc_hi 279; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 280; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 281; GFX10-NEXT: s_setpc_b64 s[30:31] 282 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 283 %load = load i8, i8 addrspace(1)* %gep, align 4 284 ret i8 %load 285} 286 287; Fill 11-bit low-bits (1ull << 33) | 2047 288define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 289; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0: 290; GFX9: ; %bb.0: 291; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 293; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 294; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 295; GFX9-NEXT: s_setpc_b64 s[30:31] 296; 297; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0: 298; GFX10: ; %bb.0: 299; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 300; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 301; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0 302; GFX10-NEXT: ; implicit-def: $vcc_hi 303; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 304; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 305; GFX10-NEXT: s_setpc_b64 s[30:31] 306 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 307 %load = load i8, i8 addrspace(1)* %gep, align 4 308 ret i8 %load 309} 310 311; Fill 11-bit low-bits (1ull << 33) | 2048 312define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 313; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1: 314; GFX9: ; %bb.0: 315; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 317; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 318; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 319; GFX9-NEXT: s_setpc_b64 s[30:31] 320; 321; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1: 322; GFX10: ; %bb.0: 323; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 325; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 326; GFX10-NEXT: ; implicit-def: $vcc_hi 327; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 328; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 329; GFX10-NEXT: s_setpc_b64 s[30:31] 330 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 331 %load = load i8, i8 addrspace(1)* %gep, align 4 332 ret i8 %load 333} 334 335; Fill 12-bit low-bits (1ull << 33) | 4095 336define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 337; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0: 338; GFX9: ; %bb.0: 339; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 341; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 342; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 343; GFX9-NEXT: s_setpc_b64 s[30:31] 344; 345; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0: 346; GFX10: ; %bb.0: 347; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 349; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 350; GFX10-NEXT: ; implicit-def: $vcc_hi 351; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 352; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 353; GFX10-NEXT: s_setpc_b64 s[30:31] 354 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 355 %load = load i8, i8 addrspace(1)* %gep, align 4 356 ret i8 %load 357} 358 359; Fill 12-bit low-bits (1ull << 33) | 4096 360define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 361; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: 362; GFX9: ; %bb.0: 363; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 365; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 366; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 367; GFX9-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1: 370; GFX10: ; %bb.0: 371; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 373; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 374; GFX10-NEXT: ; implicit-def: $vcc_hi 375; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 376; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 377; GFX10-NEXT: s_setpc_b64 s[30:31] 378 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 379 %load = load i8, i8 addrspace(1)* %gep, align 4 380 ret i8 %load 381} 382 383; Fill 13-bit low-bits (1ull << 33) | 8191 384define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 385; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0: 386; GFX9: ; %bb.0: 387; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 388; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 389; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 390; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 391; GFX9-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0: 394; GFX10: ; %bb.0: 395; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 397; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0 398; GFX10-NEXT: ; implicit-def: $vcc_hi 399; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 400; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 401; GFX10-NEXT: s_setpc_b64 s[30:31] 402 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 403 %load = load i8, i8 addrspace(1)* %gep, align 4 404 ret i8 %load 405} 406 407; Fill 13-bit low-bits (1ull << 33) | 8192 408define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 409; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: 410; GFX9: ; %bb.0: 411; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 413; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 414; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 415; GFX9-NEXT: s_setpc_b64 s[30:31] 416; 417; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1: 418; GFX10: ; %bb.0: 419; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 421; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 422; GFX10-NEXT: ; implicit-def: $vcc_hi 423; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 424; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 425; GFX10-NEXT: s_setpc_b64 s[30:31] 426 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 427 %load = load i8, i8 addrspace(1)* %gep, align 4 428 ret i8 %load 429} 430 431; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 432define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 433; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 434; GFX9: ; %bb.0: 435; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 436; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 437; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 438; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 439; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 440; GFX9-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 443; GFX10: ; %bb.0: 444; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 446; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 447; GFX10-NEXT: ; implicit-def: $vcc_hi 448; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 449; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 450; GFX10-NEXT: s_setpc_b64 s[30:31] 451 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 452 %load = load i8, i8 addrspace(1)* %gep, align 4 453 ret i8 %load 454} 455 456; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 457define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 458; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 459; GFX9: ; %bb.0: 460; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 461; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 462; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 463; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 464; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 465; GFX9-NEXT: s_setpc_b64 s[30:31] 466; 467; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 468; GFX10: ; %bb.0: 469; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 471; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 472; GFX10-NEXT: ; implicit-def: $vcc_hi 473; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 474; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 475; GFX10-NEXT: s_setpc_b64 s[30:31] 476 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 477 %load = load i8, i8 addrspace(1)* %gep, align 4 478 ret i8 %load 479} 480 481; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 482define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 483; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 484; GFX9: ; %bb.0: 485; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 487; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 488; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 489; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 490; GFX9-NEXT: s_setpc_b64 s[30:31] 491; 492; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 493; GFX10: ; %bb.0: 494; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 496; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 497; GFX10-NEXT: ; implicit-def: $vcc_hi 498; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 499; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 500; GFX10-NEXT: s_setpc_b64 s[30:31] 501 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 502 %load = load i8, i8 addrspace(1)* %gep, align 4 503 ret i8 %load 504} 505 506; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 507define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 508; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 509; GFX9: ; %bb.0: 510; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 512; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 513; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 514; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 515; GFX9-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 518; GFX10: ; %bb.0: 519; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 521; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 522; GFX10-NEXT: ; implicit-def: $vcc_hi 523; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 524; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 525; GFX10-NEXT: s_setpc_b64 s[30:31] 526 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 527 %load = load i8, i8 addrspace(1)* %gep, align 4 528 ret i8 %load 529} 530 531; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 532define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 533; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 534; GFX9: ; %bb.0: 535; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 537; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 538; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 539; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 540; GFX9-NEXT: s_setpc_b64 s[30:31] 541; 542; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 543; GFX10: ; %bb.0: 544; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 546; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 547; GFX10-NEXT: ; implicit-def: $vcc_hi 548; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 549; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 550; GFX10-NEXT: s_setpc_b64 s[30:31] 551 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 552 %load = load i8, i8 addrspace(1)* %gep, align 4 553 ret i8 %load 554} 555 556; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 557define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 558; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 559; GFX9: ; %bb.0: 560; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 562; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 563; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 564; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 565; GFX9-NEXT: s_setpc_b64 s[30:31] 566; 567; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 568; GFX10: ; %bb.0: 569; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 571; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 572; GFX10-NEXT: ; implicit-def: $vcc_hi 573; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 574; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 575; GFX10-NEXT: s_setpc_b64 s[30:31] 576 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 577 %load = load i8, i8 addrspace(1)* %gep, align 4 578 ret i8 %load 579} 580 581define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) { 582; GFX9-LABEL: global_inst_salu_offset_1: 583; GFX9: ; %bb.0: 584; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 585; GFX9-NEXT: s_waitcnt lgkmcnt(0) 586; GFX9-NEXT: v_mov_b32_e32 v0, s0 587; GFX9-NEXT: v_mov_b32_e32 v1, s1 588; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 589; GFX9-NEXT: s_waitcnt vmcnt(0) 590; GFX9-NEXT: global_store_byte v[0:1], v0, off 591; GFX9-NEXT: s_endpgm 592; 593; GFX10-LABEL: global_inst_salu_offset_1: 594; GFX10: ; %bb.0: 595; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 596; GFX10-NEXT: ; implicit-def: $vcc_hi 597; GFX10-NEXT: s_waitcnt lgkmcnt(0) 598; GFX10-NEXT: v_mov_b32_e32 v0, s0 599; GFX10-NEXT: v_mov_b32_e32 v1, s1 600; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 601; GFX10-NEXT: s_waitcnt vmcnt(0) 602; GFX10-NEXT: global_store_byte v[0:1], v0, off 603; GFX10-NEXT: s_endpgm 604 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 605 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 606 store i8 %load, i8 addrspace(1)* undef 607 ret void 608} 609 610define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) { 611; GFX9-LABEL: global_inst_salu_offset_11bit_max: 612; GFX9: ; %bb.0: 613; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 614; GFX9-NEXT: s_waitcnt lgkmcnt(0) 615; GFX9-NEXT: v_mov_b32_e32 v0, s0 616; GFX9-NEXT: v_mov_b32_e32 v1, s1 617; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 618; GFX9-NEXT: s_waitcnt vmcnt(0) 619; GFX9-NEXT: global_store_byte v[0:1], v0, off 620; GFX9-NEXT: s_endpgm 621; 622; GFX10-LABEL: global_inst_salu_offset_11bit_max: 623; GFX10: ; %bb.0: 624; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 625; GFX10-NEXT: ; implicit-def: $vcc_hi 626; GFX10-NEXT: s_waitcnt lgkmcnt(0) 627; GFX10-NEXT: v_mov_b32_e32 v0, s0 628; GFX10-NEXT: v_mov_b32_e32 v1, s1 629; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 630; GFX10-NEXT: s_waitcnt vmcnt(0) 631; GFX10-NEXT: global_store_byte v[0:1], v0, off 632; GFX10-NEXT: s_endpgm 633 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 634 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 635 store i8 %load, i8 addrspace(1)* undef 636 ret void 637} 638 639define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) { 640; GFX9-LABEL: global_inst_salu_offset_12bit_max: 641; GFX9: ; %bb.0: 642; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 643; GFX9-NEXT: s_waitcnt lgkmcnt(0) 644; GFX9-NEXT: v_mov_b32_e32 v0, s0 645; GFX9-NEXT: v_mov_b32_e32 v1, s1 646; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 647; GFX9-NEXT: s_waitcnt vmcnt(0) 648; GFX9-NEXT: global_store_byte v[0:1], v0, off 649; GFX9-NEXT: s_endpgm 650; 651; GFX10-LABEL: global_inst_salu_offset_12bit_max: 652; GFX10: ; %bb.0: 653; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 654; GFX10-NEXT: ; implicit-def: $vcc_hi 655; GFX10-NEXT: s_waitcnt lgkmcnt(0) 656; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 657; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 658; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 659; GFX10-NEXT: s_waitcnt vmcnt(0) 660; GFX10-NEXT: global_store_byte v[0:1], v0, off 661; GFX10-NEXT: s_endpgm 662 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 663 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 664 store i8 %load, i8 addrspace(1)* undef 665 ret void 666} 667 668define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) { 669; GFX9-LABEL: global_inst_salu_offset_13bit_max: 670; GFX9: ; %bb.0: 671; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 672; GFX9-NEXT: s_waitcnt lgkmcnt(0) 673; GFX9-NEXT: v_mov_b32_e32 v0, s0 674; GFX9-NEXT: v_mov_b32_e32 v1, s1 675; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 676; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 677; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 678; GFX9-NEXT: s_waitcnt vmcnt(0) 679; GFX9-NEXT: global_store_byte v[0:1], v0, off 680; GFX9-NEXT: s_endpgm 681; 682; GFX10-LABEL: global_inst_salu_offset_13bit_max: 683; GFX10: ; %bb.0: 684; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 685; GFX10-NEXT: ; implicit-def: $vcc_hi 686; GFX10-NEXT: s_waitcnt lgkmcnt(0) 687; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 688; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 689; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 690; GFX10-NEXT: s_waitcnt vmcnt(0) 691; GFX10-NEXT: global_store_byte v[0:1], v0, off 692; GFX10-NEXT: s_endpgm 693 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 694 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 695 store i8 %load, i8 addrspace(1)* undef 696 ret void 697} 698 699define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 700; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: 701; GFX9: ; %bb.0: 702; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 703; GFX9-NEXT: s_waitcnt lgkmcnt(0) 704; GFX9-NEXT: v_mov_b32_e32 v0, s0 705; GFX9-NEXT: v_mov_b32_e32 v1, s1 706; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 707; GFX9-NEXT: s_waitcnt vmcnt(0) 708; GFX9-NEXT: global_store_byte v[0:1], v0, off 709; GFX9-NEXT: s_endpgm 710; 711; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max: 712; GFX10: ; %bb.0: 713; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 714; GFX10-NEXT: ; implicit-def: $vcc_hi 715; GFX10-NEXT: s_waitcnt lgkmcnt(0) 716; GFX10-NEXT: v_mov_b32_e32 v0, s0 717; GFX10-NEXT: v_mov_b32_e32 v1, s1 718; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 719; GFX10-NEXT: s_waitcnt vmcnt(0) 720; GFX10-NEXT: global_store_byte v[0:1], v0, off 721; GFX10-NEXT: s_endpgm 722 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 723 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 724 store i8 %load, i8 addrspace(1)* undef 725 ret void 726} 727 728define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 729; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: 730; GFX9: ; %bb.0: 731; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 732; GFX9-NEXT: s_waitcnt lgkmcnt(0) 733; GFX9-NEXT: v_mov_b32_e32 v0, s0 734; GFX9-NEXT: v_mov_b32_e32 v1, s1 735; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 736; GFX9-NEXT: s_waitcnt vmcnt(0) 737; GFX9-NEXT: global_store_byte v[0:1], v0, off 738; GFX9-NEXT: s_endpgm 739; 740; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max: 741; GFX10: ; %bb.0: 742; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 743; GFX10-NEXT: ; implicit-def: $vcc_hi 744; GFX10-NEXT: s_waitcnt lgkmcnt(0) 745; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 746; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 747; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 748; GFX10-NEXT: s_waitcnt vmcnt(0) 749; GFX10-NEXT: global_store_byte v[0:1], v0, off 750; GFX10-NEXT: s_endpgm 751 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 752 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 753 store i8 %load, i8 addrspace(1)* undef 754 ret void 755} 756 757define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 758; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: 759; GFX9: ; %bb.0: 760; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 761; GFX9-NEXT: s_waitcnt lgkmcnt(0) 762; GFX9-NEXT: v_mov_b32_e32 v0, s0 763; GFX9-NEXT: v_mov_b32_e32 v1, s1 764; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 765; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 766; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 767; GFX9-NEXT: s_waitcnt vmcnt(0) 768; GFX9-NEXT: global_store_byte v[0:1], v0, off 769; GFX9-NEXT: s_endpgm 770; 771; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max: 772; GFX10: ; %bb.0: 773; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 774; GFX10-NEXT: ; implicit-def: $vcc_hi 775; GFX10-NEXT: s_waitcnt lgkmcnt(0) 776; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 777; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 778; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 779; GFX10-NEXT: s_waitcnt vmcnt(0) 780; GFX10-NEXT: global_store_byte v[0:1], v0, off 781; GFX10-NEXT: s_endpgm 782 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 783 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 784 store i8 %load, i8 addrspace(1)* undef 785 ret void 786} 787 788define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 789; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: 790; GFX9: ; %bb.0: 791; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 792; GFX9-NEXT: s_waitcnt lgkmcnt(0) 793; GFX9-NEXT: v_mov_b32_e32 v0, s0 794; GFX9-NEXT: v_mov_b32_e32 v1, s1 795; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 796; GFX9-NEXT: s_waitcnt vmcnt(0) 797; GFX9-NEXT: global_store_byte v[0:1], v0, off 798; GFX9-NEXT: s_endpgm 799; 800; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max: 801; GFX10: ; %bb.0: 802; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 803; GFX10-NEXT: ; implicit-def: $vcc_hi 804; GFX10-NEXT: s_waitcnt lgkmcnt(0) 805; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 806; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 807; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 808; GFX10-NEXT: s_waitcnt vmcnt(0) 809; GFX10-NEXT: global_store_byte v[0:1], v0, off 810; GFX10-NEXT: s_endpgm 811 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 812 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 813 store i8 %load, i8 addrspace(1)* undef 814 ret void 815} 816 817define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 818; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: 819; GFX9: ; %bb.0: 820; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 821; GFX9-NEXT: s_waitcnt lgkmcnt(0) 822; GFX9-NEXT: v_mov_b32_e32 v0, s0 823; GFX9-NEXT: v_mov_b32_e32 v1, s1 824; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 825; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 826; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 827; GFX9-NEXT: s_waitcnt vmcnt(0) 828; GFX9-NEXT: global_store_byte v[0:1], v0, off 829; GFX9-NEXT: s_endpgm 830; 831; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max: 832; GFX10: ; %bb.0: 833; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 834; GFX10-NEXT: ; implicit-def: $vcc_hi 835; GFX10-NEXT: s_waitcnt lgkmcnt(0) 836; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 837; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 838; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 839; GFX10-NEXT: s_waitcnt vmcnt(0) 840; GFX10-NEXT: global_store_byte v[0:1], v0, off 841; GFX10-NEXT: s_endpgm 842 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 843 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 844 store i8 %load, i8 addrspace(1)* undef 845 ret void 846} 847 848define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 849; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: 850; GFX9: ; %bb.0: 851; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 852; GFX9-NEXT: s_waitcnt lgkmcnt(0) 853; GFX9-NEXT: v_mov_b32_e32 v0, s0 854; GFX9-NEXT: v_mov_b32_e32 v1, s1 855; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 856; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 857; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 858; GFX9-NEXT: s_waitcnt vmcnt(0) 859; GFX9-NEXT: global_store_byte v[0:1], v0, off 860; GFX9-NEXT: s_endpgm 861; 862; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max: 863; GFX10: ; %bb.0: 864; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 865; GFX10-NEXT: ; implicit-def: $vcc_hi 866; GFX10-NEXT: s_waitcnt lgkmcnt(0) 867; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x3800, s0 868; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0 869; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 870; GFX10-NEXT: s_waitcnt vmcnt(0) 871; GFX10-NEXT: global_store_byte v[0:1], v0, off 872; GFX10-NEXT: s_endpgm 873 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 874 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 875 store i8 %load, i8 addrspace(1)* undef 876 ret void 877} 878 879define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 880; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 881; GFX9: ; %bb.0: 882; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 883; GFX9-NEXT: s_waitcnt lgkmcnt(0) 884; GFX9-NEXT: v_mov_b32_e32 v0, s0 885; GFX9-NEXT: v_mov_b32_e32 v1, s1 886; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 887; GFX9-NEXT: s_waitcnt vmcnt(0) 888; GFX9-NEXT: global_store_byte v[0:1], v0, off 889; GFX9-NEXT: s_endpgm 890; 891; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 892; GFX10: ; %bb.0: 893; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 894; GFX10-NEXT: ; implicit-def: $vcc_hi 895; GFX10-NEXT: s_waitcnt lgkmcnt(0) 896; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0 897; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 898; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 899; GFX10-NEXT: s_waitcnt vmcnt(0) 900; GFX10-NEXT: global_store_byte v[0:1], v0, off 901; GFX10-NEXT: s_endpgm 902 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 903 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 904 store i8 %load, i8 addrspace(1)* undef 905 ret void 906} 907 908define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 909; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 910; GFX9: ; %bb.0: 911; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 912; GFX9-NEXT: s_waitcnt lgkmcnt(0) 913; GFX9-NEXT: v_mov_b32_e32 v0, s0 914; GFX9-NEXT: v_mov_b32_e32 v1, s1 915; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 916; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 917; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 918; GFX9-NEXT: s_waitcnt vmcnt(0) 919; GFX9-NEXT: global_store_byte v[0:1], v0, off 920; GFX9-NEXT: s_endpgm 921; 922; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 923; GFX10: ; %bb.0: 924; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 925; GFX10-NEXT: ; implicit-def: $vcc_hi 926; GFX10-NEXT: s_waitcnt lgkmcnt(0) 927; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0 928; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 929; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 930; GFX10-NEXT: s_waitcnt vmcnt(0) 931; GFX10-NEXT: global_store_byte v[0:1], v0, off 932; GFX10-NEXT: s_endpgm 933 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 934 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 935 store i8 %load, i8 addrspace(1)* undef 936 ret void 937} 938 939define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 940; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 941; GFX9: ; %bb.0: 942; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 943; GFX9-NEXT: s_waitcnt lgkmcnt(0) 944; GFX9-NEXT: v_mov_b32_e32 v0, s0 945; GFX9-NEXT: v_mov_b32_e32 v1, s1 946; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 947; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 948; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 949; GFX9-NEXT: s_waitcnt vmcnt(0) 950; GFX9-NEXT: global_store_byte v[0:1], v0, off 951; GFX9-NEXT: s_endpgm 952; 953; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 954; GFX10: ; %bb.0: 955; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 956; GFX10-NEXT: ; implicit-def: $vcc_hi 957; GFX10-NEXT: s_waitcnt lgkmcnt(0) 958; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffc000, s0 959; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 960; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 961; GFX10-NEXT: s_waitcnt vmcnt(0) 962; GFX10-NEXT: global_store_byte v[0:1], v0, off 963; GFX10-NEXT: s_endpgm 964 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 965 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 966 store i8 %load, i8 addrspace(1)* undef 967 ret void 968} 969 970; Fill 11-bit low-bits (1ull << 33) | 2047 971define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 972; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: 973; GFX9: ; %bb.0: 974; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 975; GFX9-NEXT: s_waitcnt lgkmcnt(0) 976; GFX9-NEXT: v_mov_b32_e32 v1, s1 977; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 978; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 979; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 980; GFX9-NEXT: s_waitcnt vmcnt(0) 981; GFX9-NEXT: global_store_byte v[0:1], v0, off 982; GFX9-NEXT: s_endpgm 983; 984; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0: 985; GFX10: ; %bb.0: 986; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 987; GFX10-NEXT: ; implicit-def: $vcc_hi 988; GFX10-NEXT: s_waitcnt lgkmcnt(0) 989; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0, s0 990; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 991; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 992; GFX10-NEXT: s_waitcnt vmcnt(0) 993; GFX10-NEXT: global_store_byte v[0:1], v0, off 994; GFX10-NEXT: s_endpgm 995 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 996 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 997 store i8 %load, i8 addrspace(1)* undef 998 ret void 999} 1000 1001; Fill 11-bit low-bits (1ull << 33) | 2048 1002define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 1003; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1004; GFX9: ; %bb.0: 1005; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1006; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1007; GFX9-NEXT: v_mov_b32_e32 v1, s1 1008; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1009; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1010; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 1011; GFX9-NEXT: s_waitcnt vmcnt(0) 1012; GFX9-NEXT: global_store_byte v[0:1], v0, off 1013; GFX9-NEXT: s_endpgm 1014; 1015; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1016; GFX10: ; %bb.0: 1017; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1018; GFX10-NEXT: ; implicit-def: $vcc_hi 1019; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1020; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 1021; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1022; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1023; GFX10-NEXT: s_waitcnt vmcnt(0) 1024; GFX10-NEXT: global_store_byte v[0:1], v0, off 1025; GFX10-NEXT: s_endpgm 1026 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 1027 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1028 store i8 %load, i8 addrspace(1)* undef 1029 ret void 1030} 1031 1032; Fill 12-bit low-bits (1ull << 33) | 4095 1033define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 1034; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1035; GFX9: ; %bb.0: 1036; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1037; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1038; GFX9-NEXT: v_mov_b32_e32 v1, s1 1039; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1040; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1041; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 1042; GFX9-NEXT: s_waitcnt vmcnt(0) 1043; GFX9-NEXT: global_store_byte v[0:1], v0, off 1044; GFX9-NEXT: s_endpgm 1045; 1046; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1047; GFX10: ; %bb.0: 1048; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1049; GFX10-NEXT: ; implicit-def: $vcc_hi 1050; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1051; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0 1052; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1053; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 1054; GFX10-NEXT: s_waitcnt vmcnt(0) 1055; GFX10-NEXT: global_store_byte v[0:1], v0, off 1056; GFX10-NEXT: s_endpgm 1057 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 1058 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1059 store i8 %load, i8 addrspace(1)* undef 1060 ret void 1061} 1062 1063; Fill 12-bit low-bits (1ull << 33) | 4096 1064define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 1065; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1066; GFX9: ; %bb.0: 1067; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1068; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1069; GFX9-NEXT: v_mov_b32_e32 v0, s0 1070; GFX9-NEXT: v_mov_b32_e32 v1, s1 1071; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1072; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1073; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1074; GFX9-NEXT: s_waitcnt vmcnt(0) 1075; GFX9-NEXT: global_store_byte v[0:1], v0, off 1076; GFX9-NEXT: s_endpgm 1077; 1078; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1079; GFX10: ; %bb.0: 1080; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1081; GFX10-NEXT: ; implicit-def: $vcc_hi 1082; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1083; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1000, s0 1084; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1085; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1086; GFX10-NEXT: s_waitcnt vmcnt(0) 1087; GFX10-NEXT: global_store_byte v[0:1], v0, off 1088; GFX10-NEXT: s_endpgm 1089 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 1090 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1091 store i8 %load, i8 addrspace(1)* undef 1092 ret void 1093} 1094 1095; Fill 13-bit low-bits (1ull << 33) | 8191 1096define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 1097; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1098; GFX9: ; %bb.0: 1099; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1100; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1101; GFX9-NEXT: v_mov_b32_e32 v0, s0 1102; GFX9-NEXT: v_mov_b32_e32 v1, s1 1103; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1104; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1105; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 1106; GFX9-NEXT: s_waitcnt vmcnt(0) 1107; GFX9-NEXT: global_store_byte v[0:1], v0, off 1108; GFX9-NEXT: s_endpgm 1109; 1110; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1111; GFX10: ; %bb.0: 1112; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1113; GFX10-NEXT: ; implicit-def: $vcc_hi 1114; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1115; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0 1116; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1117; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 1118; GFX10-NEXT: s_waitcnt vmcnt(0) 1119; GFX10-NEXT: global_store_byte v[0:1], v0, off 1120; GFX10-NEXT: s_endpgm 1121 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 1122 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1123 store i8 %load, i8 addrspace(1)* undef 1124 ret void 1125} 1126 1127; Fill 13-bit low-bits (1ull << 33) | 8192 1128define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 1129; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1130; GFX9: ; %bb.0: 1131; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1132; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1133; GFX9-NEXT: v_mov_b32_e32 v0, s0 1134; GFX9-NEXT: v_mov_b32_e32 v1, s1 1135; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1136; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1137; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1138; GFX9-NEXT: s_waitcnt vmcnt(0) 1139; GFX9-NEXT: global_store_byte v[0:1], v0, off 1140; GFX9-NEXT: s_endpgm 1141; 1142; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1143; GFX10: ; %bb.0: 1144; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1145; GFX10-NEXT: ; implicit-def: $vcc_hi 1146; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1147; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x2000, s0 1148; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1149; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1150; GFX10-NEXT: s_waitcnt vmcnt(0) 1151; GFX10-NEXT: global_store_byte v[0:1], v0, off 1152; GFX10-NEXT: s_endpgm 1153 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 1154 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1155 store i8 %load, i8 addrspace(1)* undef 1156 ret void 1157} 1158 1159; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1160define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 1161; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1162; GFX9: ; %bb.0: 1163; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1164; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1165; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1166; GFX9-NEXT: v_mov_b32_e32 v0, s0 1167; GFX9-NEXT: v_mov_b32_e32 v2, s1 1168; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1169; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1170; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 1171; GFX9-NEXT: s_waitcnt vmcnt(0) 1172; GFX9-NEXT: global_store_byte v[0:1], v0, off 1173; GFX9-NEXT: s_endpgm 1174; 1175; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1176; GFX10: ; %bb.0: 1177; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1178; GFX10-NEXT: ; implicit-def: $vcc_hi 1179; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1180; GFX10-NEXT: v_mov_b32_e32 v1, s1 1181; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 1182; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1183; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1184; GFX10-NEXT: s_waitcnt vmcnt(0) 1185; GFX10-NEXT: global_store_byte v[0:1], v0, off 1186; GFX10-NEXT: s_endpgm 1187 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 1188 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1189 store i8 %load, i8 addrspace(1)* undef 1190 ret void 1191} 1192 1193; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1194define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 1195; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1196; GFX9: ; %bb.0: 1197; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1198; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1199; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1200; GFX9-NEXT: v_mov_b32_e32 v0, s0 1201; GFX9-NEXT: v_mov_b32_e32 v2, s1 1202; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1203; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1204; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 1205; GFX9-NEXT: s_waitcnt vmcnt(0) 1206; GFX9-NEXT: global_store_byte v[0:1], v0, off 1207; GFX9-NEXT: s_endpgm 1208; 1209; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1210; GFX10: ; %bb.0: 1211; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1212; GFX10-NEXT: ; implicit-def: $vcc_hi 1213; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1214; GFX10-NEXT: v_mov_b32_e32 v1, s1 1215; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, s0 1216; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1217; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1218; GFX10-NEXT: s_waitcnt vmcnt(0) 1219; GFX10-NEXT: global_store_byte v[0:1], v0, off 1220; GFX10-NEXT: s_endpgm 1221 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 1222 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1223 store i8 %load, i8 addrspace(1)* undef 1224 ret void 1225} 1226 1227; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1228define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 1229; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1230; GFX9: ; %bb.0: 1231; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1232; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1233; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1234; GFX9-NEXT: v_mov_b32_e32 v0, s0 1235; GFX9-NEXT: v_mov_b32_e32 v2, s1 1236; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1237; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1238; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1239; GFX9-NEXT: s_waitcnt vmcnt(0) 1240; GFX9-NEXT: global_store_byte v[0:1], v0, off 1241; GFX9-NEXT: s_endpgm 1242; 1243; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1244; GFX10: ; %bb.0: 1245; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1246; GFX10-NEXT: ; implicit-def: $vcc_hi 1247; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1248; GFX10-NEXT: v_mov_b32_e32 v1, s1 1249; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 1250; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1251; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1252; GFX10-NEXT: s_waitcnt vmcnt(0) 1253; GFX10-NEXT: global_store_byte v[0:1], v0, off 1254; GFX10-NEXT: s_endpgm 1255 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 1256 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1257 store i8 %load, i8 addrspace(1)* undef 1258 ret void 1259} 1260 1261; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1262define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 1263; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1264; GFX9: ; %bb.0: 1265; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1266; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1267; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1268; GFX9-NEXT: v_mov_b32_e32 v0, s0 1269; GFX9-NEXT: v_mov_b32_e32 v2, s1 1270; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1271; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1272; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1273; GFX9-NEXT: s_waitcnt vmcnt(0) 1274; GFX9-NEXT: global_store_byte v[0:1], v0, off 1275; GFX9-NEXT: s_endpgm 1276; 1277; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1278; GFX10: ; %bb.0: 1279; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1280; GFX10-NEXT: ; implicit-def: $vcc_hi 1281; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1282; GFX10-NEXT: v_mov_b32_e32 v1, s1 1283; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0 1284; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1285; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1286; GFX10-NEXT: s_waitcnt vmcnt(0) 1287; GFX10-NEXT: global_store_byte v[0:1], v0, off 1288; GFX10-NEXT: s_endpgm 1289 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 1290 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1291 store i8 %load, i8 addrspace(1)* undef 1292 ret void 1293} 1294 1295; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1296define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 1297; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1298; GFX9: ; %bb.0: 1299; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1300; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1301; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1302; GFX9-NEXT: v_mov_b32_e32 v0, s0 1303; GFX9-NEXT: v_mov_b32_e32 v2, s1 1304; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1305; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1306; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1307; GFX9-NEXT: s_waitcnt vmcnt(0) 1308; GFX9-NEXT: global_store_byte v[0:1], v0, off 1309; GFX9-NEXT: s_endpgm 1310; 1311; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1312; GFX10: ; %bb.0: 1313; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1314; GFX10-NEXT: ; implicit-def: $vcc_hi 1315; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1316; GFX10-NEXT: v_mov_b32_e32 v1, s1 1317; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 1318; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1319; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1320; GFX10-NEXT: s_waitcnt vmcnt(0) 1321; GFX10-NEXT: global_store_byte v[0:1], v0, off 1322; GFX10-NEXT: s_endpgm 1323 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 1324 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1325 store i8 %load, i8 addrspace(1)* undef 1326 ret void 1327} 1328 1329; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1330define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 1331; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1332; GFX9: ; %bb.0: 1333; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1334; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1335; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1336; GFX9-NEXT: v_mov_b32_e32 v0, s0 1337; GFX9-NEXT: v_mov_b32_e32 v2, s1 1338; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1339; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1340; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1341; GFX9-NEXT: s_waitcnt vmcnt(0) 1342; GFX9-NEXT: global_store_byte v[0:1], v0, off 1343; GFX9-NEXT: s_endpgm 1344; 1345; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1346; GFX10: ; %bb.0: 1347; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1348; GFX10-NEXT: ; implicit-def: $vcc_hi 1349; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1350; GFX10-NEXT: v_mov_b32_e32 v1, s1 1351; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0 1352; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1353; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1354; GFX10-NEXT: s_waitcnt vmcnt(0) 1355; GFX10-NEXT: global_store_byte v[0:1], v0, off 1356; GFX10-NEXT: s_endpgm 1357 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 1358 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1359 store i8 %load, i8 addrspace(1)* undef 1360 ret void 1361} 1362