1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s 5 6; Test splitting flat instruction offsets into the low and high bits 7; when the offset doesn't fit in the offset field. 8 9define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) { 10; GFX9-LABEL: global_inst_valu_offset_1: 11; GFX9: ; %bb.0: 12; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 14; GFX9-NEXT: s_waitcnt vmcnt(0) 15; GFX9-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX10-LABEL: global_inst_valu_offset_1: 18; GFX10: ; %bb.0: 19; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 21; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 22; GFX10-NEXT: s_waitcnt vmcnt(0) 23; GFX10-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX11-LABEL: global_inst_valu_offset_1: 26; GFX11: ; %bb.0: 27; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 29; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 30; GFX11-NEXT: s_waitcnt vmcnt(0) 31; GFX11-NEXT: s_setpc_b64 s[30:31] 32 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 33 %load = load i8, i8 addrspace(1)* %gep, align 4 34 ret i8 %load 35} 36 37define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) { 38; GFX9-LABEL: global_inst_valu_offset_11bit_max: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 42; GFX9-NEXT: s_waitcnt vmcnt(0) 43; GFX9-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX10-LABEL: global_inst_valu_offset_11bit_max: 46; GFX10: ; %bb.0: 47; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 49; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 50; GFX10-NEXT: s_waitcnt vmcnt(0) 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11-LABEL: global_inst_valu_offset_11bit_max: 54; GFX11: ; %bb.0: 55; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 57; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 58; GFX11-NEXT: s_waitcnt vmcnt(0) 59; GFX11-NEXT: s_setpc_b64 s[30:31] 60 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 61 %load = load i8, i8 addrspace(1)* %gep, align 4 62 ret i8 %load 63} 64 65define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) { 66; GFX9-LABEL: global_inst_valu_offset_12bit_max: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 70; GFX9-NEXT: s_waitcnt vmcnt(0) 71; GFX9-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX10-LABEL: global_inst_valu_offset_12bit_max: 74; GFX10: ; %bb.0: 75; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 77; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 78; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 79; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 80; GFX10-NEXT: s_waitcnt vmcnt(0) 81; GFX10-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX11-LABEL: global_inst_valu_offset_12bit_max: 84; GFX11: ; %bb.0: 85; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 87; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 88; GFX11-NEXT: s_waitcnt vmcnt(0) 89; GFX11-NEXT: s_setpc_b64 s[30:31] 90 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 91 %load = load i8, i8 addrspace(1)* %gep, align 4 92 ret i8 %load 93} 94 95define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) { 96; GFX9-LABEL: global_inst_valu_offset_13bit_max: 97; GFX9: ; %bb.0: 98; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 100; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 101; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 102; GFX9-NEXT: s_waitcnt vmcnt(0) 103; GFX9-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX10-LABEL: global_inst_valu_offset_13bit_max: 106; GFX10: ; %bb.0: 107; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 109; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 110; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 111; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 112; GFX10-NEXT: s_waitcnt vmcnt(0) 113; GFX10-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX11-LABEL: global_inst_valu_offset_13bit_max: 116; GFX11: ; %bb.0: 117; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 119; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 120; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 121; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 122; GFX11-NEXT: s_waitcnt vmcnt(0) 123; GFX11-NEXT: s_setpc_b64 s[30:31] 124 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 125 %load = load i8, i8 addrspace(1)* %gep, align 4 126 ret i8 %load 127} 128 129define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 130; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: 131; GFX9: ; %bb.0: 132; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 134; GFX9-NEXT: s_waitcnt vmcnt(0) 135; GFX9-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: 138; GFX10: ; %bb.0: 139; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 141; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 142; GFX10-NEXT: s_waitcnt vmcnt(0) 143; GFX10-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max: 146; GFX11: ; %bb.0: 147; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 149; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 150; GFX11-NEXT: s_waitcnt vmcnt(0) 151; GFX11-NEXT: s_setpc_b64 s[30:31] 152 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 153 %load = load i8, i8 addrspace(1)* %gep, align 4 154 ret i8 %load 155} 156 157define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 158; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: 159; GFX9: ; %bb.0: 160; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 162; GFX9-NEXT: s_waitcnt vmcnt(0) 163; GFX9-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: 166; GFX10: ; %bb.0: 167; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 169; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 170; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 171; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 172; GFX10-NEXT: s_waitcnt vmcnt(0) 173; GFX10-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max: 176; GFX11: ; %bb.0: 177; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 179; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 180; GFX11-NEXT: s_waitcnt vmcnt(0) 181; GFX11-NEXT: s_setpc_b64 s[30:31] 182 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 183 %load = load i8, i8 addrspace(1)* %gep, align 4 184 ret i8 %load 185} 186 187define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 188; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: 189; GFX9: ; %bb.0: 190; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 192; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 193; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 194; GFX9-NEXT: s_waitcnt vmcnt(0) 195; GFX9-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: 198; GFX10: ; %bb.0: 199; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 201; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 202; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 203; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 204; GFX10-NEXT: s_waitcnt vmcnt(0) 205; GFX10-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max: 208; GFX11: ; %bb.0: 209; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 211; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 212; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 213; GFX11-NEXT: global_load_u8 v0, v[0:1], off 214; GFX11-NEXT: s_waitcnt vmcnt(0) 215; GFX11-NEXT: s_setpc_b64 s[30:31] 216 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 217 %load = load i8, i8 addrspace(1)* %gep, align 4 218 ret i8 %load 219} 220 221define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 222; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: 223; GFX9: ; %bb.0: 224; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 226; GFX9-NEXT: s_waitcnt vmcnt(0) 227; GFX9-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max: 230; GFX10: ; %bb.0: 231; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 233; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 234; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 235; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 236; GFX10-NEXT: s_waitcnt vmcnt(0) 237; GFX10-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max: 240; GFX11: ; %bb.0: 241; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 243; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 244; GFX11-NEXT: s_waitcnt vmcnt(0) 245; GFX11-NEXT: s_setpc_b64 s[30:31] 246 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 247 %load = load i8, i8 addrspace(1)* %gep, align 4 248 ret i8 %load 249} 250 251define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 252; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max: 253; GFX9: ; %bb.0: 254; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 256; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 257; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 258; GFX9-NEXT: s_waitcnt vmcnt(0) 259; GFX9-NEXT: s_setpc_b64 s[30:31] 260; 261; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max: 262; GFX10: ; %bb.0: 263; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 264; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 265; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 266; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 267; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 268; GFX10-NEXT: s_waitcnt vmcnt(0) 269; GFX10-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX11-LABEL: global_inst_valu_offset_2x_12bit_max: 272; GFX11: ; %bb.0: 273; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 275; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 276; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 277; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 278; GFX11-NEXT: s_waitcnt vmcnt(0) 279; GFX11-NEXT: s_setpc_b64 s[30:31] 280 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 281 %load = load i8, i8 addrspace(1)* %gep, align 4 282 ret i8 %load 283} 284 285define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 286; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max: 287; GFX9: ; %bb.0: 288; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 290; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 291; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 292; GFX9-NEXT: s_waitcnt vmcnt(0) 293; GFX9-NEXT: s_setpc_b64 s[30:31] 294; 295; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max: 296; GFX10: ; %bb.0: 297; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 299; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0 300; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 301; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 302; GFX10-NEXT: s_waitcnt vmcnt(0) 303; GFX10-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX11-LABEL: global_inst_valu_offset_2x_13bit_max: 306; GFX11: ; %bb.0: 307; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 309; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 310; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 311; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 312; GFX11-NEXT: s_waitcnt vmcnt(0) 313; GFX11-NEXT: s_setpc_b64 s[30:31] 314 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 315 %load = load i8, i8 addrspace(1)* %gep, align 4 316 ret i8 %load 317} 318 319define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 320; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 321; GFX9: ; %bb.0: 322; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 324; GFX9-NEXT: s_waitcnt vmcnt(0) 325; GFX9-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 328; GFX10: ; %bb.0: 329; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 331; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 332; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 333; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 334; GFX10-NEXT: s_waitcnt vmcnt(0) 335; GFX10-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 338; GFX11: ; %bb.0: 339; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 341; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 342; GFX11-NEXT: s_waitcnt vmcnt(0) 343; GFX11-NEXT: s_setpc_b64 s[30:31] 344 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 345 %load = load i8, i8 addrspace(1)* %gep, align 4 346 ret i8 %load 347} 348 349define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 350; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 351; GFX9: ; %bb.0: 352; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 354; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 355; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 356; GFX9-NEXT: s_waitcnt vmcnt(0) 357; GFX9-NEXT: s_setpc_b64 s[30:31] 358; 359; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 360; GFX10: ; %bb.0: 361; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 363; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 364; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 365; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 366; GFX10-NEXT: s_waitcnt vmcnt(0) 367; GFX10-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 370; GFX11: ; %bb.0: 371; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 373; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 374; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 375; GFX11-NEXT: global_load_u8 v0, v[0:1], off 376; GFX11-NEXT: s_waitcnt vmcnt(0) 377; GFX11-NEXT: s_setpc_b64 s[30:31] 378 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 379 %load = load i8, i8 addrspace(1)* %gep, align 4 380 ret i8 %load 381} 382 383define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 384; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 385; GFX9: ; %bb.0: 386; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 388; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 389; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 390; GFX9-NEXT: s_waitcnt vmcnt(0) 391; GFX9-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 394; GFX10: ; %bb.0: 395; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 397; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 398; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 399; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 400; GFX10-NEXT: s_waitcnt vmcnt(0) 401; GFX10-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 404; GFX11: ; %bb.0: 405; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 407; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 408; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 409; GFX11-NEXT: global_load_u8 v0, v[0:1], off 410; GFX11-NEXT: s_waitcnt vmcnt(0) 411; GFX11-NEXT: s_setpc_b64 s[30:31] 412 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 413 %load = load i8, i8 addrspace(1)* %gep, align 4 414 ret i8 %load 415} 416 417; Fill 11-bit low-bits (1ull << 33) | 2047 418define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 419; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0: 420; GFX9: ; %bb.0: 421; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 423; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 424; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 425; GFX9-NEXT: s_waitcnt vmcnt(0) 426; GFX9-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0: 429; GFX10: ; %bb.0: 430; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 432; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 433; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 434; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 435; GFX10-NEXT: s_waitcnt vmcnt(0) 436; GFX10-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_split0: 439; GFX11: ; %bb.0: 440; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 442; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 443; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 444; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 445; GFX11-NEXT: s_waitcnt vmcnt(0) 446; GFX11-NEXT: s_setpc_b64 s[30:31] 447 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 448 %load = load i8, i8 addrspace(1)* %gep, align 4 449 ret i8 %load 450} 451 452; Fill 11-bit low-bits (1ull << 33) | 2048 453define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 454; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1: 455; GFX9: ; %bb.0: 456; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 457; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 458; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 459; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 460; GFX9-NEXT: s_waitcnt vmcnt(0) 461; GFX9-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1: 464; GFX10: ; %bb.0: 465; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 467; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 468; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 469; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 470; GFX10-NEXT: s_waitcnt vmcnt(0) 471; GFX10-NEXT: s_setpc_b64 s[30:31] 472; 473; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_split1: 474; GFX11: ; %bb.0: 475; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 477; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 478; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 479; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2048 480; GFX11-NEXT: s_waitcnt vmcnt(0) 481; GFX11-NEXT: s_setpc_b64 s[30:31] 482 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 483 %load = load i8, i8 addrspace(1)* %gep, align 4 484 ret i8 %load 485} 486 487; Fill 12-bit low-bits (1ull << 33) | 4095 488define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 489; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0: 490; GFX9: ; %bb.0: 491; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 493; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 494; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 495; GFX9-NEXT: s_waitcnt vmcnt(0) 496; GFX9-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0: 499; GFX10: ; %bb.0: 500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 502; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 503; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 504; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 505; GFX10-NEXT: s_waitcnt vmcnt(0) 506; GFX10-NEXT: s_setpc_b64 s[30:31] 507; 508; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split0: 509; GFX11: ; %bb.0: 510; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 511; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 512; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 513; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 514; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 515; GFX11-NEXT: s_waitcnt vmcnt(0) 516; GFX11-NEXT: s_setpc_b64 s[30:31] 517 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 518 %load = load i8, i8 addrspace(1)* %gep, align 4 519 ret i8 %load 520} 521 522; Fill 12-bit low-bits (1ull << 33) | 4096 523define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 524; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: 525; GFX9: ; %bb.0: 526; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 528; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 529; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 530; GFX9-NEXT: s_waitcnt vmcnt(0) 531; GFX9-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1: 534; GFX10: ; %bb.0: 535; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 537; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 538; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 539; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 540; GFX10-NEXT: s_waitcnt vmcnt(0) 541; GFX10-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1: 544; GFX11: ; %bb.0: 545; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 547; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 548; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 549; GFX11-NEXT: global_load_u8 v0, v[0:1], off 550; GFX11-NEXT: s_waitcnt vmcnt(0) 551; GFX11-NEXT: s_setpc_b64 s[30:31] 552 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 553 %load = load i8, i8 addrspace(1)* %gep, align 4 554 ret i8 %load 555} 556 557; Fill 13-bit low-bits (1ull << 33) | 8191 558define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 559; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0: 560; GFX9: ; %bb.0: 561; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 563; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 564; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 565; GFX9-NEXT: s_waitcnt vmcnt(0) 566; GFX9-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0: 569; GFX10: ; %bb.0: 570; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 572; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 573; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 574; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 575; GFX10-NEXT: s_waitcnt vmcnt(0) 576; GFX10-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split0: 579; GFX11: ; %bb.0: 580; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 582; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 583; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 584; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 585; GFX11-NEXT: s_waitcnt vmcnt(0) 586; GFX11-NEXT: s_setpc_b64 s[30:31] 587 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 588 %load = load i8, i8 addrspace(1)* %gep, align 4 589 ret i8 %load 590} 591 592; Fill 13-bit low-bits (1ull << 33) | 8192 593define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 594; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: 595; GFX9: ; %bb.0: 596; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 598; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 599; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 600; GFX9-NEXT: s_waitcnt vmcnt(0) 601; GFX9-NEXT: s_setpc_b64 s[30:31] 602; 603; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1: 604; GFX10: ; %bb.0: 605; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 607; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 608; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 609; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 610; GFX10-NEXT: s_waitcnt vmcnt(0) 611; GFX10-NEXT: s_setpc_b64 s[30:31] 612; 613; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1: 614; GFX11: ; %bb.0: 615; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 617; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 618; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 619; GFX11-NEXT: global_load_u8 v0, v[0:1], off 620; GFX11-NEXT: s_waitcnt vmcnt(0) 621; GFX11-NEXT: s_setpc_b64 s[30:31] 622 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 623 %load = load i8, i8 addrspace(1)* %gep, align 4 624 ret i8 %load 625} 626 627; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 628define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 629; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 630; GFX9: ; %bb.0: 631; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 632; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 633; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 634; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 635; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 636; GFX9-NEXT: s_waitcnt vmcnt(0) 637; GFX9-NEXT: s_setpc_b64 s[30:31] 638; 639; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 640; GFX10: ; %bb.0: 641; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 643; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 644; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 645; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 646; GFX10-NEXT: s_waitcnt vmcnt(0) 647; GFX10-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 650; GFX11: ; %bb.0: 651; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 653; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 654; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 655; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2049 656; GFX11-NEXT: s_waitcnt vmcnt(0) 657; GFX11-NEXT: s_setpc_b64 s[30:31] 658 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 659 %load = load i8, i8 addrspace(1)* %gep, align 4 660 ret i8 %load 661} 662 663; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 664define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 665; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 666; GFX9: ; %bb.0: 667; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 668; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 669; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 670; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 671; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 672; GFX9-NEXT: s_waitcnt vmcnt(0) 673; GFX9-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 676; GFX10: ; %bb.0: 677; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 679; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 680; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 681; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 682; GFX10-NEXT: s_waitcnt vmcnt(0) 683; GFX10-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 686; GFX11: ; %bb.0: 687; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 689; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 690; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 691; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 692; GFX11-NEXT: s_waitcnt vmcnt(0) 693; GFX11-NEXT: s_setpc_b64 s[30:31] 694 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 695 %load = load i8, i8 addrspace(1)* %gep, align 4 696 ret i8 %load 697} 698 699; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 700define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 701; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 702; GFX9: ; %bb.0: 703; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 705; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 706; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 707; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 708; GFX9-NEXT: s_waitcnt vmcnt(0) 709; GFX9-NEXT: s_setpc_b64 s[30:31] 710; 711; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 712; GFX10: ; %bb.0: 713; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 715; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 716; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 717; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 718; GFX10-NEXT: s_waitcnt vmcnt(0) 719; GFX10-NEXT: s_setpc_b64 s[30:31] 720; 721; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 722; GFX11: ; %bb.0: 723; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 724; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 725; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 726; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 727; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-1 728; GFX11-NEXT: s_waitcnt vmcnt(0) 729; GFX11-NEXT: s_setpc_b64 s[30:31] 730 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 731 %load = load i8, i8 addrspace(1)* %gep, align 4 732 ret i8 %load 733} 734 735; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 736define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 737; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 738; GFX9: ; %bb.0: 739; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 741; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 742; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 743; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 744; GFX9-NEXT: s_waitcnt vmcnt(0) 745; GFX9-NEXT: s_setpc_b64 s[30:31] 746; 747; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 748; GFX10: ; %bb.0: 749; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 750; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 751; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 752; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 753; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 754; GFX10-NEXT: s_waitcnt vmcnt(0) 755; GFX10-NEXT: s_setpc_b64 s[30:31] 756; 757; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 758; GFX11: ; %bb.0: 759; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 760; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 761; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 762; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 763; GFX11-NEXT: global_load_u8 v0, v[0:1], off 764; GFX11-NEXT: s_waitcnt vmcnt(0) 765; GFX11-NEXT: s_setpc_b64 s[30:31] 766 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 767 %load = load i8, i8 addrspace(1)* %gep, align 4 768 ret i8 %load 769} 770 771; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 772define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 773; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 774; GFX9: ; %bb.0: 775; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 776; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 777; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 778; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 779; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 780; GFX9-NEXT: s_waitcnt vmcnt(0) 781; GFX9-NEXT: s_setpc_b64 s[30:31] 782; 783; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 784; GFX10: ; %bb.0: 785; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 787; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 788; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 789; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 790; GFX10-NEXT: s_waitcnt vmcnt(0) 791; GFX10-NEXT: s_setpc_b64 s[30:31] 792; 793; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 794; GFX11: ; %bb.0: 795; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 797; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 798; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 799; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-1 800; GFX11-NEXT: s_waitcnt vmcnt(0) 801; GFX11-NEXT: s_setpc_b64 s[30:31] 802 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 803 %load = load i8, i8 addrspace(1)* %gep, align 4 804 ret i8 %load 805} 806 807; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 808define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 809; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 810; GFX9: ; %bb.0: 811; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 812; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 813; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 814; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 815; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 816; GFX9-NEXT: s_waitcnt vmcnt(0) 817; GFX9-NEXT: s_setpc_b64 s[30:31] 818; 819; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 820; GFX10: ; %bb.0: 821; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 822; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 823; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 824; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 825; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 826; GFX10-NEXT: s_waitcnt vmcnt(0) 827; GFX10-NEXT: s_setpc_b64 s[30:31] 828; 829; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 830; GFX11: ; %bb.0: 831; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 833; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 834; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 835; GFX11-NEXT: global_load_u8 v0, v[0:1], off 836; GFX11-NEXT: s_waitcnt vmcnt(0) 837; GFX11-NEXT: s_setpc_b64 s[30:31] 838 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 839 %load = load i8, i8 addrspace(1)* %gep, align 4 840 ret i8 %load 841} 842 843define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) { 844; GFX9-LABEL: global_inst_salu_offset_1: 845; GFX9: ; %bb.0: 846; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 847; GFX9-NEXT: v_mov_b32_e32 v0, 0 848; GFX9-NEXT: s_waitcnt lgkmcnt(0) 849; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc 850; GFX9-NEXT: s_waitcnt vmcnt(0) 851; GFX9-NEXT: global_store_byte v[0:1], v0, off 852; GFX9-NEXT: s_endpgm 853; 854; GFX10-LABEL: global_inst_salu_offset_1: 855; GFX10: ; %bb.0: 856; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 857; GFX10-NEXT: v_mov_b32_e32 v0, 0 858; GFX10-NEXT: s_waitcnt lgkmcnt(0) 859; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc 860; GFX10-NEXT: s_waitcnt vmcnt(0) 861; GFX10-NEXT: global_store_byte v[0:1], v0, off 862; GFX10-NEXT: s_endpgm 863; 864; GFX11-LABEL: global_inst_salu_offset_1: 865; GFX11: ; %bb.0: 866; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 867; GFX11-NEXT: v_mov_b32_e32 v0, 0 868; GFX11-NEXT: s_waitcnt lgkmcnt(0) 869; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc 870; GFX11-NEXT: s_waitcnt vmcnt(0) 871; GFX11-NEXT: global_store_b8 v[0:1], v0, off 872; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 873; GFX11-NEXT: s_endpgm 874 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 875 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 876 store i8 %load, i8 addrspace(1)* undef 877 ret void 878} 879 880define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) { 881; GFX9-LABEL: global_inst_salu_offset_11bit_max: 882; GFX9: ; %bb.0: 883; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 884; GFX9-NEXT: v_mov_b32_e32 v0, 0 885; GFX9-NEXT: s_waitcnt lgkmcnt(0) 886; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc 887; GFX9-NEXT: s_waitcnt vmcnt(0) 888; GFX9-NEXT: global_store_byte v[0:1], v0, off 889; GFX9-NEXT: s_endpgm 890; 891; GFX10-LABEL: global_inst_salu_offset_11bit_max: 892; GFX10: ; %bb.0: 893; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 894; GFX10-NEXT: v_mov_b32_e32 v0, 0 895; GFX10-NEXT: s_waitcnt lgkmcnt(0) 896; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 897; GFX10-NEXT: s_waitcnt vmcnt(0) 898; GFX10-NEXT: global_store_byte v[0:1], v0, off 899; GFX10-NEXT: s_endpgm 900; 901; GFX11-LABEL: global_inst_salu_offset_11bit_max: 902; GFX11: ; %bb.0: 903; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 904; GFX11-NEXT: v_mov_b32_e32 v0, 0 905; GFX11-NEXT: s_waitcnt lgkmcnt(0) 906; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc 907; GFX11-NEXT: s_waitcnt vmcnt(0) 908; GFX11-NEXT: global_store_b8 v[0:1], v0, off 909; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 910; GFX11-NEXT: s_endpgm 911 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 912 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 913 store i8 %load, i8 addrspace(1)* undef 914 ret void 915} 916 917define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) { 918; GFX9-LABEL: global_inst_salu_offset_12bit_max: 919; GFX9: ; %bb.0: 920; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 921; GFX9-NEXT: v_mov_b32_e32 v0, 0 922; GFX9-NEXT: s_waitcnt lgkmcnt(0) 923; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 924; GFX9-NEXT: s_waitcnt vmcnt(0) 925; GFX9-NEXT: global_store_byte v[0:1], v0, off 926; GFX9-NEXT: s_endpgm 927; 928; GFX10-LABEL: global_inst_salu_offset_12bit_max: 929; GFX10: ; %bb.0: 930; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 931; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 932; GFX10-NEXT: s_waitcnt lgkmcnt(0) 933; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 934; GFX10-NEXT: s_waitcnt vmcnt(0) 935; GFX10-NEXT: global_store_byte v[0:1], v0, off 936; GFX10-NEXT: s_endpgm 937; 938; GFX11-LABEL: global_inst_salu_offset_12bit_max: 939; GFX11: ; %bb.0: 940; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 941; GFX11-NEXT: v_mov_b32_e32 v0, 0 942; GFX11-NEXT: s_waitcnt lgkmcnt(0) 943; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 944; GFX11-NEXT: s_waitcnt vmcnt(0) 945; GFX11-NEXT: global_store_b8 v[0:1], v0, off 946; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 947; GFX11-NEXT: s_endpgm 948 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 949 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 950 store i8 %load, i8 addrspace(1)* undef 951 ret void 952} 953 954define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) { 955; GFX9-LABEL: global_inst_salu_offset_13bit_max: 956; GFX9: ; %bb.0: 957; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 958; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 959; GFX9-NEXT: s_waitcnt lgkmcnt(0) 960; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 961; GFX9-NEXT: s_waitcnt vmcnt(0) 962; GFX9-NEXT: global_store_byte v[0:1], v0, off 963; GFX9-NEXT: s_endpgm 964; 965; GFX10-LABEL: global_inst_salu_offset_13bit_max: 966; GFX10: ; %bb.0: 967; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 968; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 969; GFX10-NEXT: s_waitcnt lgkmcnt(0) 970; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 971; GFX10-NEXT: s_waitcnt vmcnt(0) 972; GFX10-NEXT: global_store_byte v[0:1], v0, off 973; GFX10-NEXT: s_endpgm 974; 975; GFX11-LABEL: global_inst_salu_offset_13bit_max: 976; GFX11: ; %bb.0: 977; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 978; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 979; GFX11-NEXT: s_waitcnt lgkmcnt(0) 980; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 981; GFX11-NEXT: s_waitcnt vmcnt(0) 982; GFX11-NEXT: global_store_b8 v[0:1], v0, off 983; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 984; GFX11-NEXT: s_endpgm 985 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 986 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 987 store i8 %load, i8 addrspace(1)* undef 988 ret void 989} 990 991define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 992; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: 993; GFX9: ; %bb.0: 994; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 995; GFX9-NEXT: v_mov_b32_e32 v0, 0 996; GFX9-NEXT: s_waitcnt lgkmcnt(0) 997; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc 998; GFX9-NEXT: s_waitcnt vmcnt(0) 999; GFX9-NEXT: global_store_byte v[0:1], v0, off 1000; GFX9-NEXT: s_endpgm 1001; 1002; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max: 1003; GFX10: ; %bb.0: 1004; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1005; GFX10-NEXT: v_mov_b32_e32 v0, 0 1006; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1007; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc 1008; GFX10-NEXT: s_waitcnt vmcnt(0) 1009; GFX10-NEXT: global_store_byte v[0:1], v0, off 1010; GFX10-NEXT: s_endpgm 1011; 1012; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max: 1013; GFX11: ; %bb.0: 1014; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1015; GFX11-NEXT: v_mov_b32_e32 v0, 0 1016; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc 1018; GFX11-NEXT: s_waitcnt vmcnt(0) 1019; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1020; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1021; GFX11-NEXT: s_endpgm 1022 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 1023 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1024 store i8 %load, i8 addrspace(1)* undef 1025 ret void 1026} 1027 1028define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 1029; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: 1030; GFX9: ; %bb.0: 1031; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1032; GFX9-NEXT: v_mov_b32_e32 v0, 0 1033; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1034; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 1035; GFX9-NEXT: s_waitcnt vmcnt(0) 1036; GFX9-NEXT: global_store_byte v[0:1], v0, off 1037; GFX9-NEXT: s_endpgm 1038; 1039; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max: 1040; GFX10: ; %bb.0: 1041; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1042; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1043; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 1044; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 1045; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1046; GFX10-NEXT: s_waitcnt vmcnt(0) 1047; GFX10-NEXT: global_store_byte v[0:1], v0, off 1048; GFX10-NEXT: s_endpgm 1049; 1050; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max: 1051; GFX11: ; %bb.0: 1052; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1053; GFX11-NEXT: v_mov_b32_e32 v0, 0 1054; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1055; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc 1056; GFX11-NEXT: s_waitcnt vmcnt(0) 1057; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1058; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1059; GFX11-NEXT: s_endpgm 1060 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 1061 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1062 store i8 %load, i8 addrspace(1)* undef 1063 ret void 1064} 1065 1066define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 1067; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: 1068; GFX9: ; %bb.0: 1069; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1070; GFX9-NEXT: v_mov_b32_e32 v0, 0 1071; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1072; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 1073; GFX9-NEXT: s_addc_u32 s1, s1, -1 1074; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1075; GFX9-NEXT: s_waitcnt vmcnt(0) 1076; GFX9-NEXT: global_store_byte v[0:1], v0, off 1077; GFX9-NEXT: s_endpgm 1078; 1079; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max: 1080; GFX10: ; %bb.0: 1081; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1082; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1083; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1084; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 1085; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1086; GFX10-NEXT: s_waitcnt vmcnt(0) 1087; GFX10-NEXT: global_store_byte v[0:1], v0, off 1088; GFX10-NEXT: s_endpgm 1089; 1090; GFX11-LABEL: global_inst_salu_offset_neg_13bit_max: 1091; GFX11: ; %bb.0: 1092; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1093; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1094; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1095; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1096; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1097; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 1098; GFX11-NEXT: s_waitcnt vmcnt(0) 1099; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1100; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1101; GFX11-NEXT: s_endpgm 1102 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 1103 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1104 store i8 %load, i8 addrspace(1)* undef 1105 ret void 1106} 1107 1108define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 1109; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: 1110; GFX9: ; %bb.0: 1111; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1112; GFX9-NEXT: v_mov_b32_e32 v0, 0 1113; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1114; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 1115; GFX9-NEXT: s_waitcnt vmcnt(0) 1116; GFX9-NEXT: global_store_byte v[0:1], v0, off 1117; GFX9-NEXT: s_endpgm 1118; 1119; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max: 1120; GFX10: ; %bb.0: 1121; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1122; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 1123; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1124; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 1125; GFX10-NEXT: s_waitcnt vmcnt(0) 1126; GFX10-NEXT: global_store_byte v[0:1], v0, off 1127; GFX10-NEXT: s_endpgm 1128; 1129; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max: 1130; GFX11: ; %bb.0: 1131; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1132; GFX11-NEXT: v_mov_b32_e32 v0, 0 1133; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1134; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 1135; GFX11-NEXT: s_waitcnt vmcnt(0) 1136; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1137; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1138; GFX11-NEXT: s_endpgm 1139 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 1140 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1141 store i8 %load, i8 addrspace(1)* undef 1142 ret void 1143} 1144 1145define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 1146; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: 1147; GFX9: ; %bb.0: 1148; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1149; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 1150; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1151; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 1152; GFX9-NEXT: s_waitcnt vmcnt(0) 1153; GFX9-NEXT: global_store_byte v[0:1], v0, off 1154; GFX9-NEXT: s_endpgm 1155; 1156; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max: 1157; GFX10: ; %bb.0: 1158; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1159; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 1160; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1161; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 1162; GFX10-NEXT: s_waitcnt vmcnt(0) 1163; GFX10-NEXT: global_store_byte v[0:1], v0, off 1164; GFX10-NEXT: s_endpgm 1165; 1166; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max: 1167; GFX11: ; %bb.0: 1168; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1169; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 1170; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1171; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 1172; GFX11-NEXT: s_waitcnt vmcnt(0) 1173; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1174; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1175; GFX11-NEXT: s_endpgm 1176 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 1177 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1178 store i8 %load, i8 addrspace(1)* undef 1179 ret void 1180} 1181 1182define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 1183; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: 1184; GFX9: ; %bb.0: 1185; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1186; GFX9-NEXT: v_mov_b32_e32 v0, 0x3000 1187; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1188; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 1189; GFX9-NEXT: s_waitcnt vmcnt(0) 1190; GFX9-NEXT: global_store_byte v[0:1], v0, off 1191; GFX9-NEXT: s_endpgm 1192; 1193; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max: 1194; GFX10: ; %bb.0: 1195; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1196; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 1197; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1198; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 1199; GFX10-NEXT: s_waitcnt vmcnt(0) 1200; GFX10-NEXT: global_store_byte v[0:1], v0, off 1201; GFX10-NEXT: s_endpgm 1202; 1203; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max: 1204; GFX11: ; %bb.0: 1205; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1206; GFX11-NEXT: v_mov_b32_e32 v0, 0x3000 1207; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1208; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 1209; GFX11-NEXT: s_waitcnt vmcnt(0) 1210; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1211; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1212; GFX11-NEXT: s_endpgm 1213 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 1214 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1215 store i8 %load, i8 addrspace(1)* undef 1216 ret void 1217} 1218 1219define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 1220; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 1221; GFX9: ; %bb.0: 1222; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1223; GFX9-NEXT: v_mov_b32_e32 v0, 0 1224; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1225; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 1226; GFX9-NEXT: s_waitcnt vmcnt(0) 1227; GFX9-NEXT: global_store_byte v[0:1], v0, off 1228; GFX9-NEXT: s_endpgm 1229; 1230; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 1231; GFX10: ; %bb.0: 1232; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1233; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1234; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 1235; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 1236; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1237; GFX10-NEXT: s_waitcnt vmcnt(0) 1238; GFX10-NEXT: global_store_byte v[0:1], v0, off 1239; GFX10-NEXT: s_endpgm 1240; 1241; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 1242; GFX11: ; %bb.0: 1243; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1244; GFX11-NEXT: v_mov_b32_e32 v0, 0 1245; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1246; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc 1247; GFX11-NEXT: s_waitcnt vmcnt(0) 1248; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1249; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1250; GFX11-NEXT: s_endpgm 1251 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 1252 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1253 store i8 %load, i8 addrspace(1)* undef 1254 ret void 1255} 1256 1257define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 1258; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 1259; GFX9: ; %bb.0: 1260; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1261; GFX9-NEXT: v_mov_b32_e32 v0, 0 1262; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1263; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 1264; GFX9-NEXT: s_addc_u32 s1, s1, -1 1265; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1266; GFX9-NEXT: s_waitcnt vmcnt(0) 1267; GFX9-NEXT: global_store_byte v[0:1], v0, off 1268; GFX9-NEXT: s_endpgm 1269; 1270; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 1271; GFX10: ; %bb.0: 1272; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1273; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1274; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1275; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 1276; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1277; GFX10-NEXT: s_waitcnt vmcnt(0) 1278; GFX10-NEXT: global_store_byte v[0:1], v0, off 1279; GFX10-NEXT: s_endpgm 1280; 1281; GFX11-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 1282; GFX11: ; %bb.0: 1283; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1284; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1285; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1286; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1287; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1288; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 1289; GFX11-NEXT: s_waitcnt vmcnt(0) 1290; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1291; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1292; GFX11-NEXT: s_endpgm 1293 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 1294 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1295 store i8 %load, i8 addrspace(1)* undef 1296 ret void 1297} 1298 1299define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 1300; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 1301; GFX9: ; %bb.0: 1302; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1303; GFX9-NEXT: v_mov_b32_e32 v0, 0 1304; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1305; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000 1306; GFX9-NEXT: s_addc_u32 s1, s1, -1 1307; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1308; GFX9-NEXT: s_waitcnt vmcnt(0) 1309; GFX9-NEXT: global_store_byte v[0:1], v0, off 1310; GFX9-NEXT: s_endpgm 1311; 1312; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 1313; GFX10: ; %bb.0: 1314; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1315; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1316; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 1317; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 1318; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1319; GFX10-NEXT: s_waitcnt vmcnt(0) 1320; GFX10-NEXT: global_store_byte v[0:1], v0, off 1321; GFX10-NEXT: s_endpgm 1322; 1323; GFX11-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 1324; GFX11: ; %bb.0: 1325; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1326; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1327; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 1328; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1329; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1330; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 1331; GFX11-NEXT: s_waitcnt vmcnt(0) 1332; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1333; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1334; GFX11-NEXT: s_endpgm 1335 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 1336 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1337 store i8 %load, i8 addrspace(1)* undef 1338 ret void 1339} 1340 1341; Fill 11-bit low-bits (1ull << 33) | 2047 1342define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 1343; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: 1344; GFX9: ; %bb.0: 1345; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1346; GFX9-NEXT: v_mov_b32_e32 v0, 0 1347; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1348; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 1349; GFX9-NEXT: s_addc_u32 s1, s1, 2 1350; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1351; GFX9-NEXT: s_waitcnt vmcnt(0) 1352; GFX9-NEXT: global_store_byte v[0:1], v0, off 1353; GFX9-NEXT: s_endpgm 1354; 1355; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0: 1356; GFX10: ; %bb.0: 1357; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1358; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1359; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0 1360; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1361; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 1362; GFX10-NEXT: s_waitcnt vmcnt(0) 1363; GFX10-NEXT: global_store_byte v[0:1], v0, off 1364; GFX10-NEXT: s_endpgm 1365; 1366; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_split0: 1367; GFX11: ; %bb.0: 1368; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1369; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1370; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1371; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1372; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1373; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc 1374; GFX11-NEXT: s_waitcnt vmcnt(0) 1375; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1376; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1377; GFX11-NEXT: s_endpgm 1378 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 1379 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1380 store i8 %load, i8 addrspace(1)* undef 1381 ret void 1382} 1383 1384; Fill 11-bit low-bits (1ull << 33) | 2048 1385define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 1386; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1387; GFX9: ; %bb.0: 1388; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1389; GFX9-NEXT: v_mov_b32_e32 v0, 0 1390; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1391; GFX9-NEXT: s_add_u32 s0, s0, 0x800 1392; GFX9-NEXT: s_addc_u32 s1, s1, 2 1393; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1394; GFX9-NEXT: s_waitcnt vmcnt(0) 1395; GFX9-NEXT: global_store_byte v[0:1], v0, off 1396; GFX9-NEXT: s_endpgm 1397; 1398; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1399; GFX10: ; %bb.0: 1400; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1401; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1402; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 1403; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1404; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1405; GFX10-NEXT: s_waitcnt vmcnt(0) 1406; GFX10-NEXT: global_store_byte v[0:1], v0, off 1407; GFX10-NEXT: s_endpgm 1408; 1409; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1410; GFX11: ; %bb.0: 1411; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1412; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1413; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1414; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1415; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1416; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc 1417; GFX11-NEXT: s_waitcnt vmcnt(0) 1418; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1419; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1420; GFX11-NEXT: s_endpgm 1421 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 1422 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1423 store i8 %load, i8 addrspace(1)* undef 1424 ret void 1425} 1426 1427; Fill 12-bit low-bits (1ull << 33) | 4095 1428define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 1429; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1430; GFX9: ; %bb.0: 1431; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1432; GFX9-NEXT: v_mov_b32_e32 v0, 0 1433; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1434; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 1435; GFX9-NEXT: s_addc_u32 s1, s1, 2 1436; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1437; GFX9-NEXT: s_waitcnt vmcnt(0) 1438; GFX9-NEXT: global_store_byte v[0:1], v0, off 1439; GFX9-NEXT: s_endpgm 1440; 1441; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1442; GFX10: ; %bb.0: 1443; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1444; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1445; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 1446; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1447; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 1448; GFX10-NEXT: s_waitcnt vmcnt(0) 1449; GFX10-NEXT: global_store_byte v[0:1], v0, off 1450; GFX10-NEXT: s_endpgm 1451; 1452; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1453; GFX11: ; %bb.0: 1454; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1455; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1456; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1457; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1458; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1459; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc 1460; GFX11-NEXT: s_waitcnt vmcnt(0) 1461; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1462; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1463; GFX11-NEXT: s_endpgm 1464 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 1465 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1466 store i8 %load, i8 addrspace(1)* undef 1467 ret void 1468} 1469 1470; Fill 12-bit low-bits (1ull << 33) | 4096 1471define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 1472; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1473; GFX9: ; %bb.0: 1474; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1475; GFX9-NEXT: v_mov_b32_e32 v0, 0 1476; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1477; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 1478; GFX9-NEXT: s_addc_u32 s1, s1, 2 1479; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1480; GFX9-NEXT: s_waitcnt vmcnt(0) 1481; GFX9-NEXT: global_store_byte v[0:1], v0, off 1482; GFX9-NEXT: s_endpgm 1483; 1484; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1485; GFX10: ; %bb.0: 1486; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1487; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1488; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1489; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1490; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1491; GFX10-NEXT: s_waitcnt vmcnt(0) 1492; GFX10-NEXT: global_store_byte v[0:1], v0, off 1493; GFX10-NEXT: s_endpgm 1494; 1495; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1496; GFX11: ; %bb.0: 1497; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1498; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1499; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1500; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1501; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1502; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 1503; GFX11-NEXT: s_waitcnt vmcnt(0) 1504; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1505; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1506; GFX11-NEXT: s_endpgm 1507 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 1508 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1509 store i8 %load, i8 addrspace(1)* undef 1510 ret void 1511} 1512 1513; Fill 13-bit low-bits (1ull << 33) | 8191 1514define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 1515; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1516; GFX9: ; %bb.0: 1517; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1518; GFX9-NEXT: v_mov_b32_e32 v0, 0 1519; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1520; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 1521; GFX9-NEXT: s_addc_u32 s1, s1, 2 1522; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1523; GFX9-NEXT: s_waitcnt vmcnt(0) 1524; GFX9-NEXT: global_store_byte v[0:1], v0, off 1525; GFX9-NEXT: s_endpgm 1526; 1527; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1528; GFX10: ; %bb.0: 1529; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1530; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1531; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 1532; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1533; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 1534; GFX10-NEXT: s_waitcnt vmcnt(0) 1535; GFX10-NEXT: global_store_byte v[0:1], v0, off 1536; GFX10-NEXT: s_endpgm 1537; 1538; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1539; GFX11: ; %bb.0: 1540; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1541; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1542; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1543; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1544; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1545; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc 1546; GFX11-NEXT: s_waitcnt vmcnt(0) 1547; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1548; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1549; GFX11-NEXT: s_endpgm 1550 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 1551 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1552 store i8 %load, i8 addrspace(1)* undef 1553 ret void 1554} 1555 1556; Fill 13-bit low-bits (1ull << 33) | 8192 1557define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 1558; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1559; GFX9: ; %bb.0: 1560; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1561; GFX9-NEXT: v_mov_b32_e32 v0, 0 1562; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1563; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 1564; GFX9-NEXT: s_addc_u32 s1, s1, 2 1565; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1566; GFX9-NEXT: s_waitcnt vmcnt(0) 1567; GFX9-NEXT: global_store_byte v[0:1], v0, off 1568; GFX9-NEXT: s_endpgm 1569; 1570; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1571; GFX10: ; %bb.0: 1572; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1573; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1574; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 1575; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 1576; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1577; GFX10-NEXT: s_waitcnt vmcnt(0) 1578; GFX10-NEXT: global_store_byte v[0:1], v0, off 1579; GFX10-NEXT: s_endpgm 1580; 1581; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1582; GFX11: ; %bb.0: 1583; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1584; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1585; GFX11-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 1586; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1587; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1588; GFX11-NEXT: global_load_u8 v0, v[0:1], off glc dlc 1589; GFX11-NEXT: s_waitcnt vmcnt(0) 1590; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1591; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1592; GFX11-NEXT: s_endpgm 1593 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 1594 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1595 store i8 %load, i8 addrspace(1)* undef 1596 ret void 1597} 1598 1599; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1600define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 1601; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1602; GFX9: ; %bb.0: 1603; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1604; GFX9-NEXT: v_mov_b32_e32 v0, 0 1605; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1606; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 1607; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1608; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1609; GFX9-NEXT: s_waitcnt vmcnt(0) 1610; GFX9-NEXT: global_store_byte v[0:1], v0, off 1611; GFX9-NEXT: s_endpgm 1612; 1613; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1614; GFX10: ; %bb.0: 1615; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1616; GFX10-NEXT: v_mov_b32_e32 v0, 0 1617; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1618; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1619; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1620; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1621; GFX10-NEXT: s_waitcnt vmcnt(0) 1622; GFX10-NEXT: global_store_byte v[0:1], v0, off 1623; GFX10-NEXT: s_endpgm 1624; 1625; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1626; GFX11: ; %bb.0: 1627; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1628; GFX11-NEXT: v_mov_b32_e32 v0, 0 1629; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1630; GFX11-NEXT: s_add_u32 s0, s0, 0x7ff 1631; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1632; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1633; GFX11-NEXT: s_waitcnt vmcnt(0) 1634; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1635; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1636; GFX11-NEXT: s_endpgm 1637 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 1638 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1639 store i8 %load, i8 addrspace(1)* undef 1640 ret void 1641} 1642 1643; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1644define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 1645; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1646; GFX9: ; %bb.0: 1647; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1648; GFX9-NEXT: v_mov_b32_e32 v0, 0 1649; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1650; GFX9-NEXT: s_add_u32 s0, s0, 0x800 1651; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1652; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1653; GFX9-NEXT: s_waitcnt vmcnt(0) 1654; GFX9-NEXT: global_store_byte v[0:1], v0, off 1655; GFX9-NEXT: s_endpgm 1656; 1657; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1658; GFX10: ; %bb.0: 1659; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1660; GFX10-NEXT: v_mov_b32_e32 v0, 0 1661; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1662; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1663; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1664; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1665; GFX10-NEXT: s_waitcnt vmcnt(0) 1666; GFX10-NEXT: global_store_byte v[0:1], v0, off 1667; GFX10-NEXT: s_endpgm 1668; 1669; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1670; GFX11: ; %bb.0: 1671; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1672; GFX11-NEXT: v_mov_b32_e32 v0, 0 1673; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1674; GFX11-NEXT: s_add_u32 s0, s0, 0x800 1675; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1676; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1677; GFX11-NEXT: s_waitcnt vmcnt(0) 1678; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1679; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1680; GFX11-NEXT: s_endpgm 1681 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 1682 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1683 store i8 %load, i8 addrspace(1)* undef 1684 ret void 1685} 1686 1687; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1688define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 1689; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1690; GFX9: ; %bb.0: 1691; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1692; GFX9-NEXT: v_mov_b32_e32 v0, 0 1693; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1694; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 1695; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1696; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1697; GFX9-NEXT: s_waitcnt vmcnt(0) 1698; GFX9-NEXT: global_store_byte v[0:1], v0, off 1699; GFX9-NEXT: s_endpgm 1700; 1701; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1702; GFX10: ; %bb.0: 1703; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1704; GFX10-NEXT: v_mov_b32_e32 v0, 0 1705; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1706; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1707; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1708; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1709; GFX10-NEXT: s_waitcnt vmcnt(0) 1710; GFX10-NEXT: global_store_byte v[0:1], v0, off 1711; GFX10-NEXT: s_endpgm 1712; 1713; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1714; GFX11: ; %bb.0: 1715; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1716; GFX11-NEXT: v_mov_b32_e32 v0, 0 1717; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1718; GFX11-NEXT: s_add_u32 s0, s0, 0xfff 1719; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1720; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1721; GFX11-NEXT: s_waitcnt vmcnt(0) 1722; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1723; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1724; GFX11-NEXT: s_endpgm 1725 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 1726 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1727 store i8 %load, i8 addrspace(1)* undef 1728 ret void 1729} 1730 1731; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1732define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 1733; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1734; GFX9: ; %bb.0: 1735; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1736; GFX9-NEXT: v_mov_b32_e32 v0, 0 1737; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1738; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 1739; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1740; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1741; GFX9-NEXT: s_waitcnt vmcnt(0) 1742; GFX9-NEXT: global_store_byte v[0:1], v0, off 1743; GFX9-NEXT: s_endpgm 1744; 1745; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1746; GFX10: ; %bb.0: 1747; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1748; GFX10-NEXT: v_mov_b32_e32 v0, 0 1749; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1750; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1751; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1752; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1753; GFX10-NEXT: s_waitcnt vmcnt(0) 1754; GFX10-NEXT: global_store_byte v[0:1], v0, off 1755; GFX10-NEXT: s_endpgm 1756; 1757; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1758; GFX11: ; %bb.0: 1759; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1760; GFX11-NEXT: v_mov_b32_e32 v0, 0 1761; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1762; GFX11-NEXT: s_add_u32 s0, s0, 0x1000 1763; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1764; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1765; GFX11-NEXT: s_waitcnt vmcnt(0) 1766; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1767; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1768; GFX11-NEXT: s_endpgm 1769 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 1770 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1771 store i8 %load, i8 addrspace(1)* undef 1772 ret void 1773} 1774 1775; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1776define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 1777; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1778; GFX9: ; %bb.0: 1779; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1780; GFX9-NEXT: v_mov_b32_e32 v0, 0 1781; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1782; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 1783; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1784; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1785; GFX9-NEXT: s_waitcnt vmcnt(0) 1786; GFX9-NEXT: global_store_byte v[0:1], v0, off 1787; GFX9-NEXT: s_endpgm 1788; 1789; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1790; GFX10: ; %bb.0: 1791; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1792; GFX10-NEXT: v_mov_b32_e32 v0, 0 1793; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1794; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1795; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1796; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1797; GFX10-NEXT: s_waitcnt vmcnt(0) 1798; GFX10-NEXT: global_store_byte v[0:1], v0, off 1799; GFX10-NEXT: s_endpgm 1800; 1801; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1802; GFX11: ; %bb.0: 1803; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1804; GFX11-NEXT: v_mov_b32_e32 v0, 0 1805; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1806; GFX11-NEXT: s_add_u32 s0, s0, 0x1fff 1807; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1808; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1809; GFX11-NEXT: s_waitcnt vmcnt(0) 1810; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1811; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1812; GFX11-NEXT: s_endpgm 1813 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 1814 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1815 store i8 %load, i8 addrspace(1)* undef 1816 ret void 1817} 1818 1819; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1820define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 1821; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1822; GFX9: ; %bb.0: 1823; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1824; GFX9-NEXT: v_mov_b32_e32 v0, 0 1825; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1826; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 1827; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1828; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1829; GFX9-NEXT: s_waitcnt vmcnt(0) 1830; GFX9-NEXT: global_store_byte v[0:1], v0, off 1831; GFX9-NEXT: s_endpgm 1832; 1833; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1834; GFX10: ; %bb.0: 1835; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1836; GFX10-NEXT: v_mov_b32_e32 v0, 0 1837; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1838; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1839; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1840; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1841; GFX10-NEXT: s_waitcnt vmcnt(0) 1842; GFX10-NEXT: global_store_byte v[0:1], v0, off 1843; GFX10-NEXT: s_endpgm 1844; 1845; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1846; GFX11: ; %bb.0: 1847; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1848; GFX11-NEXT: v_mov_b32_e32 v0, 0 1849; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1850; GFX11-NEXT: s_add_u32 s0, s0, 0x2000 1851; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 1852; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 1853; GFX11-NEXT: s_waitcnt vmcnt(0) 1854; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1855; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1856; GFX11-NEXT: s_endpgm 1857 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 1858 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1859 store i8 %load, i8 addrspace(1)* undef 1860 ret void 1861} 1862