1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5; Test splitting flat instruction offsets into the low and high bits 6; when the offset doesn't fit in the offset field. 7 8define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) { 9; GFX9-LABEL: global_inst_valu_offset_1: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 13; GFX9-NEXT: s_waitcnt vmcnt(0) 14; GFX9-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX10-LABEL: global_inst_valu_offset_1: 17; GFX10: ; %bb.0: 18; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 20; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 21; GFX10-NEXT: s_waitcnt vmcnt(0) 22; GFX10-NEXT: s_setpc_b64 s[30:31] 23 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 24 %load = load i8, i8 addrspace(1)* %gep, align 4 25 ret i8 %load 26} 27 28define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) { 29; GFX9-LABEL: global_inst_valu_offset_11bit_max: 30; GFX9: ; %bb.0: 31; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 33; GFX9-NEXT: s_waitcnt vmcnt(0) 34; GFX9-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX10-LABEL: global_inst_valu_offset_11bit_max: 37; GFX10: ; %bb.0: 38; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 40; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 41; GFX10-NEXT: s_waitcnt vmcnt(0) 42; GFX10-NEXT: s_setpc_b64 s[30:31] 43 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 44 %load = load i8, i8 addrspace(1)* %gep, align 4 45 ret i8 %load 46} 47 48define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) { 49; GFX9-LABEL: global_inst_valu_offset_12bit_max: 50; GFX9: ; %bb.0: 51; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 53; GFX9-NEXT: s_waitcnt vmcnt(0) 54; GFX9-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX10-LABEL: global_inst_valu_offset_12bit_max: 57; GFX10: ; %bb.0: 58; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 60; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 61; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 62; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 63; GFX10-NEXT: s_waitcnt vmcnt(0) 64; GFX10-NEXT: s_setpc_b64 s[30:31] 65 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 66 %load = load i8, i8 addrspace(1)* %gep, align 4 67 ret i8 %load 68} 69 70define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) { 71; GFX9-LABEL: global_inst_valu_offset_13bit_max: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 75; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 76; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 77; GFX9-NEXT: s_waitcnt vmcnt(0) 78; GFX9-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX10-LABEL: global_inst_valu_offset_13bit_max: 81; GFX10: ; %bb.0: 82; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 84; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 85; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 86; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 87; GFX10-NEXT: s_waitcnt vmcnt(0) 88; GFX10-NEXT: s_setpc_b64 s[30:31] 89 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 90 %load = load i8, i8 addrspace(1)* %gep, align 4 91 ret i8 %load 92} 93 94define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 95; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: 96; GFX9: ; %bb.0: 97; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 99; GFX9-NEXT: s_waitcnt vmcnt(0) 100; GFX9-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: 103; GFX10: ; %bb.0: 104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 106; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 107; GFX10-NEXT: s_waitcnt vmcnt(0) 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 110 %load = load i8, i8 addrspace(1)* %gep, align 4 111 ret i8 %load 112} 113 114define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 115; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: 116; GFX9: ; %bb.0: 117; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 119; GFX9-NEXT: s_waitcnt vmcnt(0) 120; GFX9-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: 123; GFX10: ; %bb.0: 124; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 126; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 127; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 128; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 129; GFX10-NEXT: s_waitcnt vmcnt(0) 130; GFX10-NEXT: s_setpc_b64 s[30:31] 131 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 132 %load = load i8, i8 addrspace(1)* %gep, align 4 133 ret i8 %load 134} 135 136define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 137; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 141; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 142; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 143; GFX9-NEXT: s_waitcnt vmcnt(0) 144; GFX9-NEXT: s_setpc_b64 s[30:31] 145; 146; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: 147; GFX10: ; %bb.0: 148; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 150; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 151; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 152; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 153; GFX10-NEXT: s_waitcnt vmcnt(0) 154; GFX10-NEXT: s_setpc_b64 s[30:31] 155 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 156 %load = load i8, i8 addrspace(1)* %gep, align 4 157 ret i8 %load 158} 159 160define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 161; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: 162; GFX9: ; %bb.0: 163; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 165; GFX9-NEXT: s_waitcnt vmcnt(0) 166; GFX9-NEXT: s_setpc_b64 s[30:31] 167; 168; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max: 169; GFX10: ; %bb.0: 170; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 172; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 173; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 174; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 175; GFX10-NEXT: s_waitcnt vmcnt(0) 176; GFX10-NEXT: s_setpc_b64 s[30:31] 177 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 178 %load = load i8, i8 addrspace(1)* %gep, align 4 179 ret i8 %load 180} 181 182define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 183; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max: 184; GFX9: ; %bb.0: 185; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 187; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 188; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 189; GFX9-NEXT: s_waitcnt vmcnt(0) 190; GFX9-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max: 193; GFX10: ; %bb.0: 194; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 196; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 197; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 198; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 199; GFX10-NEXT: s_waitcnt vmcnt(0) 200; GFX10-NEXT: s_setpc_b64 s[30:31] 201 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 202 %load = load i8, i8 addrspace(1)* %gep, align 4 203 ret i8 %load 204} 205 206define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 207; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max: 208; GFX9: ; %bb.0: 209; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 211; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 212; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 213; GFX9-NEXT: s_waitcnt vmcnt(0) 214; GFX9-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max: 217; GFX10: ; %bb.0: 218; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 220; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0 221; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 222; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 223; GFX10-NEXT: s_waitcnt vmcnt(0) 224; GFX10-NEXT: s_setpc_b64 s[30:31] 225 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 226 %load = load i8, i8 addrspace(1)* %gep, align 4 227 ret i8 %load 228} 229 230define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 231; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 232; GFX9: ; %bb.0: 233; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 235; GFX9-NEXT: s_waitcnt vmcnt(0) 236; GFX9-NEXT: s_setpc_b64 s[30:31] 237; 238; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 239; GFX10: ; %bb.0: 240; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 242; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 243; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 244; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 245; GFX10-NEXT: s_waitcnt vmcnt(0) 246; GFX10-NEXT: s_setpc_b64 s[30:31] 247 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 248 %load = load i8, i8 addrspace(1)* %gep, align 4 249 ret i8 %load 250} 251 252define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 253; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 254; GFX9: ; %bb.0: 255; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 257; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 258; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 259; GFX9-NEXT: s_waitcnt vmcnt(0) 260; GFX9-NEXT: s_setpc_b64 s[30:31] 261; 262; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 263; GFX10: ; %bb.0: 264; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 266; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 267; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 268; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 269; GFX10-NEXT: s_waitcnt vmcnt(0) 270; GFX10-NEXT: s_setpc_b64 s[30:31] 271 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 272 %load = load i8, i8 addrspace(1)* %gep, align 4 273 ret i8 %load 274} 275 276define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 277; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 278; GFX9: ; %bb.0: 279; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 281; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 282; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 283; GFX9-NEXT: s_waitcnt vmcnt(0) 284; GFX9-NEXT: s_setpc_b64 s[30:31] 285; 286; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 287; GFX10: ; %bb.0: 288; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 290; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 291; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 292; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 293; GFX10-NEXT: s_waitcnt vmcnt(0) 294; GFX10-NEXT: s_setpc_b64 s[30:31] 295 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 296 %load = load i8, i8 addrspace(1)* %gep, align 4 297 ret i8 %load 298} 299 300; Fill 11-bit low-bits (1ull << 33) | 2047 301define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 302; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0: 303; GFX9: ; %bb.0: 304; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 306; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 307; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 308; GFX9-NEXT: s_waitcnt vmcnt(0) 309; GFX9-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0: 312; GFX10: ; %bb.0: 313; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 315; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 316; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 317; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 318; GFX10-NEXT: s_waitcnt vmcnt(0) 319; GFX10-NEXT: s_setpc_b64 s[30:31] 320 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 321 %load = load i8, i8 addrspace(1)* %gep, align 4 322 ret i8 %load 323} 324 325; Fill 11-bit low-bits (1ull << 33) | 2048 326define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 327; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1: 328; GFX9: ; %bb.0: 329; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 331; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 332; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 333; GFX9-NEXT: s_waitcnt vmcnt(0) 334; GFX9-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1: 337; GFX10: ; %bb.0: 338; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 340; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 341; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 342; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 343; GFX10-NEXT: s_waitcnt vmcnt(0) 344; GFX10-NEXT: s_setpc_b64 s[30:31] 345 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 346 %load = load i8, i8 addrspace(1)* %gep, align 4 347 ret i8 %load 348} 349 350; Fill 12-bit low-bits (1ull << 33) | 4095 351define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 352; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0: 353; GFX9: ; %bb.0: 354; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 356; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 357; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 358; GFX9-NEXT: s_waitcnt vmcnt(0) 359; GFX9-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0: 362; GFX10: ; %bb.0: 363; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 365; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 366; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 367; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 368; GFX10-NEXT: s_waitcnt vmcnt(0) 369; GFX10-NEXT: s_setpc_b64 s[30:31] 370 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 371 %load = load i8, i8 addrspace(1)* %gep, align 4 372 ret i8 %load 373} 374 375; Fill 12-bit low-bits (1ull << 33) | 4096 376define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 377; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: 378; GFX9: ; %bb.0: 379; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 381; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 382; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 383; GFX9-NEXT: s_waitcnt vmcnt(0) 384; GFX9-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1: 387; GFX10: ; %bb.0: 388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 390; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 391; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 392; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 393; GFX10-NEXT: s_waitcnt vmcnt(0) 394; GFX10-NEXT: s_setpc_b64 s[30:31] 395 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 396 %load = load i8, i8 addrspace(1)* %gep, align 4 397 ret i8 %load 398} 399 400; Fill 13-bit low-bits (1ull << 33) | 8191 401define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 402; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0: 403; GFX9: ; %bb.0: 404; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 405; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 406; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 407; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 408; GFX9-NEXT: s_waitcnt vmcnt(0) 409; GFX9-NEXT: s_setpc_b64 s[30:31] 410; 411; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0: 412; GFX10: ; %bb.0: 413; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 415; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 416; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 417; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 418; GFX10-NEXT: s_waitcnt vmcnt(0) 419; GFX10-NEXT: s_setpc_b64 s[30:31] 420 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 421 %load = load i8, i8 addrspace(1)* %gep, align 4 422 ret i8 %load 423} 424 425; Fill 13-bit low-bits (1ull << 33) | 8192 426define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 427; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: 428; GFX9: ; %bb.0: 429; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 431; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 432; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 433; GFX9-NEXT: s_waitcnt vmcnt(0) 434; GFX9-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1: 437; GFX10: ; %bb.0: 438; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 440; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 441; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 442; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 443; GFX10-NEXT: s_waitcnt vmcnt(0) 444; GFX10-NEXT: s_setpc_b64 s[30:31] 445 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 446 %load = load i8, i8 addrspace(1)* %gep, align 4 447 ret i8 %load 448} 449 450; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 451define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 452; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 453; GFX9: ; %bb.0: 454; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 456; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 457; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 458; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 459; GFX9-NEXT: s_waitcnt vmcnt(0) 460; GFX9-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 463; GFX10: ; %bb.0: 464; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 466; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 467; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 468; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 469; GFX10-NEXT: s_waitcnt vmcnt(0) 470; GFX10-NEXT: s_setpc_b64 s[30:31] 471 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 472 %load = load i8, i8 addrspace(1)* %gep, align 4 473 ret i8 %load 474} 475 476; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 477define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 478; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 479; GFX9: ; %bb.0: 480; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 482; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 483; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 484; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 485; GFX9-NEXT: s_waitcnt vmcnt(0) 486; GFX9-NEXT: s_setpc_b64 s[30:31] 487; 488; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 489; GFX10: ; %bb.0: 490; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 492; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 493; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 494; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 495; GFX10-NEXT: s_waitcnt vmcnt(0) 496; GFX10-NEXT: s_setpc_b64 s[30:31] 497 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 498 %load = load i8, i8 addrspace(1)* %gep, align 4 499 ret i8 %load 500} 501 502; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 503define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 504; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 505; GFX9: ; %bb.0: 506; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 508; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 509; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 510; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 511; GFX9-NEXT: s_waitcnt vmcnt(0) 512; GFX9-NEXT: s_setpc_b64 s[30:31] 513; 514; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 515; GFX10: ; %bb.0: 516; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 518; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 519; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 520; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 521; GFX10-NEXT: s_waitcnt vmcnt(0) 522; GFX10-NEXT: s_setpc_b64 s[30:31] 523 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 524 %load = load i8, i8 addrspace(1)* %gep, align 4 525 ret i8 %load 526} 527 528; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 529define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 530; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 531; GFX9: ; %bb.0: 532; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 534; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 535; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 536; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 537; GFX9-NEXT: s_waitcnt vmcnt(0) 538; GFX9-NEXT: s_setpc_b64 s[30:31] 539; 540; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 541; GFX10: ; %bb.0: 542; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 544; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 545; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 546; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 547; GFX10-NEXT: s_waitcnt vmcnt(0) 548; GFX10-NEXT: s_setpc_b64 s[30:31] 549 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 550 %load = load i8, i8 addrspace(1)* %gep, align 4 551 ret i8 %load 552} 553 554; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 555define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 556; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 557; GFX9: ; %bb.0: 558; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 560; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 561; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 562; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 563; GFX9-NEXT: s_waitcnt vmcnt(0) 564; GFX9-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 567; GFX10: ; %bb.0: 568; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 570; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 571; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 572; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 573; GFX10-NEXT: s_waitcnt vmcnt(0) 574; GFX10-NEXT: s_setpc_b64 s[30:31] 575 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 576 %load = load i8, i8 addrspace(1)* %gep, align 4 577 ret i8 %load 578} 579 580; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 581define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 582; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 583; GFX9: ; %bb.0: 584; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 586; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 587; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 588; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 589; GFX9-NEXT: s_waitcnt vmcnt(0) 590; GFX9-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 593; GFX10: ; %bb.0: 594; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 596; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 597; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 598; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 599; GFX10-NEXT: s_waitcnt vmcnt(0) 600; GFX10-NEXT: s_setpc_b64 s[30:31] 601 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 602 %load = load i8, i8 addrspace(1)* %gep, align 4 603 ret i8 %load 604} 605 606define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) { 607; GFX9-LABEL: global_inst_salu_offset_1: 608; GFX9: ; %bb.0: 609; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 610; GFX9-NEXT: v_mov_b32_e32 v0, 0 611; GFX9-NEXT: s_waitcnt lgkmcnt(0) 612; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc 613; GFX9-NEXT: s_waitcnt vmcnt(0) 614; GFX9-NEXT: global_store_byte v[0:1], v0, off 615; GFX9-NEXT: s_endpgm 616; 617; GFX10-LABEL: global_inst_salu_offset_1: 618; GFX10: ; %bb.0: 619; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 620; GFX10-NEXT: v_mov_b32_e32 v0, 0 621; GFX10-NEXT: s_waitcnt lgkmcnt(0) 622; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc 623; GFX10-NEXT: s_waitcnt vmcnt(0) 624; GFX10-NEXT: global_store_byte v[0:1], v0, off 625; GFX10-NEXT: s_endpgm 626 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1 627 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 628 store i8 %load, i8 addrspace(1)* undef 629 ret void 630} 631 632define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) { 633; GFX9-LABEL: global_inst_salu_offset_11bit_max: 634; GFX9: ; %bb.0: 635; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 636; GFX9-NEXT: v_mov_b32_e32 v0, 0 637; GFX9-NEXT: s_waitcnt lgkmcnt(0) 638; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc 639; GFX9-NEXT: s_waitcnt vmcnt(0) 640; GFX9-NEXT: global_store_byte v[0:1], v0, off 641; GFX9-NEXT: s_endpgm 642; 643; GFX10-LABEL: global_inst_salu_offset_11bit_max: 644; GFX10: ; %bb.0: 645; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 646; GFX10-NEXT: v_mov_b32_e32 v0, 0 647; GFX10-NEXT: s_waitcnt lgkmcnt(0) 648; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 649; GFX10-NEXT: s_waitcnt vmcnt(0) 650; GFX10-NEXT: global_store_byte v[0:1], v0, off 651; GFX10-NEXT: s_endpgm 652 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047 653 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 654 store i8 %load, i8 addrspace(1)* undef 655 ret void 656} 657 658define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) { 659; GFX9-LABEL: global_inst_salu_offset_12bit_max: 660; GFX9: ; %bb.0: 661; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 662; GFX9-NEXT: v_mov_b32_e32 v0, 0 663; GFX9-NEXT: s_waitcnt lgkmcnt(0) 664; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 665; GFX9-NEXT: s_waitcnt vmcnt(0) 666; GFX9-NEXT: global_store_byte v[0:1], v0, off 667; GFX9-NEXT: s_endpgm 668; 669; GFX10-LABEL: global_inst_salu_offset_12bit_max: 670; GFX10: ; %bb.0: 671; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 672; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 673; GFX10-NEXT: s_waitcnt lgkmcnt(0) 674; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 675; GFX10-NEXT: s_waitcnt vmcnt(0) 676; GFX10-NEXT: global_store_byte v[0:1], v0, off 677; GFX10-NEXT: s_endpgm 678 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 679 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 680 store i8 %load, i8 addrspace(1)* undef 681 ret void 682} 683 684define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) { 685; GFX9-LABEL: global_inst_salu_offset_13bit_max: 686; GFX9: ; %bb.0: 687; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 688; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 689; GFX9-NEXT: s_waitcnt lgkmcnt(0) 690; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 691; GFX9-NEXT: s_waitcnt vmcnt(0) 692; GFX9-NEXT: global_store_byte v[0:1], v0, off 693; GFX9-NEXT: s_endpgm 694; 695; GFX10-LABEL: global_inst_salu_offset_13bit_max: 696; GFX10: ; %bb.0: 697; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 698; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 699; GFX10-NEXT: s_waitcnt lgkmcnt(0) 700; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 701; GFX10-NEXT: s_waitcnt vmcnt(0) 702; GFX10-NEXT: global_store_byte v[0:1], v0, off 703; GFX10-NEXT: s_endpgm 704 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 705 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 706 store i8 %load, i8 addrspace(1)* undef 707 ret void 708} 709 710define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) { 711; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: 712; GFX9: ; %bb.0: 713; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 714; GFX9-NEXT: v_mov_b32_e32 v0, 0 715; GFX9-NEXT: s_waitcnt lgkmcnt(0) 716; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc 717; GFX9-NEXT: s_waitcnt vmcnt(0) 718; GFX9-NEXT: global_store_byte v[0:1], v0, off 719; GFX9-NEXT: s_endpgm 720; 721; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max: 722; GFX10: ; %bb.0: 723; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 724; GFX10-NEXT: v_mov_b32_e32 v0, 0 725; GFX10-NEXT: s_waitcnt lgkmcnt(0) 726; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc 727; GFX10-NEXT: s_waitcnt vmcnt(0) 728; GFX10-NEXT: global_store_byte v[0:1], v0, off 729; GFX10-NEXT: s_endpgm 730 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048 731 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 732 store i8 %load, i8 addrspace(1)* undef 733 ret void 734} 735 736define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) { 737; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: 738; GFX9: ; %bb.0: 739; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 740; GFX9-NEXT: v_mov_b32_e32 v0, 0 741; GFX9-NEXT: s_waitcnt lgkmcnt(0) 742; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 743; GFX9-NEXT: s_waitcnt vmcnt(0) 744; GFX9-NEXT: global_store_byte v[0:1], v0, off 745; GFX9-NEXT: s_endpgm 746; 747; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max: 748; GFX10: ; %bb.0: 749; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 750; GFX10-NEXT: s_waitcnt lgkmcnt(0) 751; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 752; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 753; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 754; GFX10-NEXT: s_waitcnt vmcnt(0) 755; GFX10-NEXT: global_store_byte v[0:1], v0, off 756; GFX10-NEXT: s_endpgm 757 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 758 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 759 store i8 %load, i8 addrspace(1)* undef 760 ret void 761} 762 763define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) { 764; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: 765; GFX9: ; %bb.0: 766; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 767; GFX9-NEXT: v_mov_b32_e32 v0, 0 768; GFX9-NEXT: s_waitcnt lgkmcnt(0) 769; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 770; GFX9-NEXT: s_addc_u32 s1, s1, -1 771; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 772; GFX9-NEXT: s_waitcnt vmcnt(0) 773; GFX9-NEXT: global_store_byte v[0:1], v0, off 774; GFX9-NEXT: s_endpgm 775; 776; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max: 777; GFX10: ; %bb.0: 778; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 779; GFX10-NEXT: s_waitcnt lgkmcnt(0) 780; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 781; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 782; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 783; GFX10-NEXT: s_waitcnt vmcnt(0) 784; GFX10-NEXT: global_store_byte v[0:1], v0, off 785; GFX10-NEXT: s_endpgm 786 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 787 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 788 store i8 %load, i8 addrspace(1)* undef 789 ret void 790} 791 792define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) { 793; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: 794; GFX9: ; %bb.0: 795; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 796; GFX9-NEXT: v_mov_b32_e32 v0, 0 797; GFX9-NEXT: s_waitcnt lgkmcnt(0) 798; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 799; GFX9-NEXT: s_waitcnt vmcnt(0) 800; GFX9-NEXT: global_store_byte v[0:1], v0, off 801; GFX9-NEXT: s_endpgm 802; 803; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max: 804; GFX10: ; %bb.0: 805; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 806; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 807; GFX10-NEXT: s_waitcnt lgkmcnt(0) 808; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 809; GFX10-NEXT: s_waitcnt vmcnt(0) 810; GFX10-NEXT: global_store_byte v[0:1], v0, off 811; GFX10-NEXT: s_endpgm 812 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095 813 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 814 store i8 %load, i8 addrspace(1)* undef 815 ret void 816} 817 818define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) { 819; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: 820; GFX9: ; %bb.0: 821; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 822; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 823; GFX9-NEXT: s_waitcnt lgkmcnt(0) 824; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 825; GFX9-NEXT: s_waitcnt vmcnt(0) 826; GFX9-NEXT: global_store_byte v[0:1], v0, off 827; GFX9-NEXT: s_endpgm 828; 829; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max: 830; GFX10: ; %bb.0: 831; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 832; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 833; GFX10-NEXT: s_waitcnt lgkmcnt(0) 834; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 835; GFX10-NEXT: s_waitcnt vmcnt(0) 836; GFX10-NEXT: global_store_byte v[0:1], v0, off 837; GFX10-NEXT: s_endpgm 838 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191 839 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 840 store i8 %load, i8 addrspace(1)* undef 841 ret void 842} 843 844define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) { 845; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: 846; GFX9: ; %bb.0: 847; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 848; GFX9-NEXT: v_mov_b32_e32 v0, 0x3000 849; GFX9-NEXT: s_waitcnt lgkmcnt(0) 850; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 851; GFX9-NEXT: s_waitcnt vmcnt(0) 852; GFX9-NEXT: global_store_byte v[0:1], v0, off 853; GFX9-NEXT: s_endpgm 854; 855; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max: 856; GFX10: ; %bb.0: 857; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 858; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 859; GFX10-NEXT: s_waitcnt lgkmcnt(0) 860; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 861; GFX10-NEXT: s_waitcnt vmcnt(0) 862; GFX10-NEXT: global_store_byte v[0:1], v0, off 863; GFX10-NEXT: s_endpgm 864 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383 865 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 866 store i8 %load, i8 addrspace(1)* undef 867 ret void 868} 869 870define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) { 871; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 872; GFX9: ; %bb.0: 873; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 874; GFX9-NEXT: v_mov_b32_e32 v0, 0 875; GFX9-NEXT: s_waitcnt lgkmcnt(0) 876; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 877; GFX9-NEXT: s_waitcnt vmcnt(0) 878; GFX9-NEXT: global_store_byte v[0:1], v0, off 879; GFX9-NEXT: s_endpgm 880; 881; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 882; GFX10: ; %bb.0: 883; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 884; GFX10-NEXT: s_waitcnt lgkmcnt(0) 885; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 886; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 887; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 888; GFX10-NEXT: s_waitcnt vmcnt(0) 889; GFX10-NEXT: global_store_byte v[0:1], v0, off 890; GFX10-NEXT: s_endpgm 891 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096 892 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 893 store i8 %load, i8 addrspace(1)* undef 894 ret void 895} 896 897define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) { 898; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 899; GFX9: ; %bb.0: 900; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 901; GFX9-NEXT: v_mov_b32_e32 v0, 0 902; GFX9-NEXT: s_waitcnt lgkmcnt(0) 903; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 904; GFX9-NEXT: s_addc_u32 s1, s1, -1 905; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 906; GFX9-NEXT: s_waitcnt vmcnt(0) 907; GFX9-NEXT: global_store_byte v[0:1], v0, off 908; GFX9-NEXT: s_endpgm 909; 910; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 911; GFX10: ; %bb.0: 912; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 913; GFX10-NEXT: s_waitcnt lgkmcnt(0) 914; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 915; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 916; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 917; GFX10-NEXT: s_waitcnt vmcnt(0) 918; GFX10-NEXT: global_store_byte v[0:1], v0, off 919; GFX10-NEXT: s_endpgm 920 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192 921 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 922 store i8 %load, i8 addrspace(1)* undef 923 ret void 924} 925 926define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) { 927; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 928; GFX9: ; %bb.0: 929; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 930; GFX9-NEXT: v_mov_b32_e32 v0, 0 931; GFX9-NEXT: s_waitcnt lgkmcnt(0) 932; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000 933; GFX9-NEXT: s_addc_u32 s1, s1, -1 934; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 935; GFX9-NEXT: s_waitcnt vmcnt(0) 936; GFX9-NEXT: global_store_byte v[0:1], v0, off 937; GFX9-NEXT: s_endpgm 938; 939; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 940; GFX10: ; %bb.0: 941; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 942; GFX10-NEXT: s_waitcnt lgkmcnt(0) 943; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 944; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 945; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 946; GFX10-NEXT: s_waitcnt vmcnt(0) 947; GFX10-NEXT: global_store_byte v[0:1], v0, off 948; GFX10-NEXT: s_endpgm 949 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384 950 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 951 store i8 %load, i8 addrspace(1)* undef 952 ret void 953} 954 955; Fill 11-bit low-bits (1ull << 33) | 2047 956define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) { 957; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: 958; GFX9: ; %bb.0: 959; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 960; GFX9-NEXT: v_mov_b32_e32 v0, 0 961; GFX9-NEXT: s_waitcnt lgkmcnt(0) 962; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 963; GFX9-NEXT: s_addc_u32 s1, s1, 2 964; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 965; GFX9-NEXT: s_waitcnt vmcnt(0) 966; GFX9-NEXT: global_store_byte v[0:1], v0, off 967; GFX9-NEXT: s_endpgm 968; 969; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0: 970; GFX10: ; %bb.0: 971; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 972; GFX10-NEXT: s_waitcnt lgkmcnt(0) 973; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0 974; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 975; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 976; GFX10-NEXT: s_waitcnt vmcnt(0) 977; GFX10-NEXT: global_store_byte v[0:1], v0, off 978; GFX10-NEXT: s_endpgm 979 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639 980 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 981 store i8 %load, i8 addrspace(1)* undef 982 ret void 983} 984 985; Fill 11-bit low-bits (1ull << 33) | 2048 986define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) { 987; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: 988; GFX9: ; %bb.0: 989; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 990; GFX9-NEXT: v_mov_b32_e32 v0, 0 991; GFX9-NEXT: s_waitcnt lgkmcnt(0) 992; GFX9-NEXT: s_add_u32 s0, s0, 0x800 993; GFX9-NEXT: s_addc_u32 s1, s1, 2 994; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 995; GFX9-NEXT: s_waitcnt vmcnt(0) 996; GFX9-NEXT: global_store_byte v[0:1], v0, off 997; GFX9-NEXT: s_endpgm 998; 999; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1: 1000; GFX10: ; %bb.0: 1001; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1002; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1003; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 1004; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1005; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1006; GFX10-NEXT: s_waitcnt vmcnt(0) 1007; GFX10-NEXT: global_store_byte v[0:1], v0, off 1008; GFX10-NEXT: s_endpgm 1009 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640 1010 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1011 store i8 %load, i8 addrspace(1)* undef 1012 ret void 1013} 1014 1015; Fill 12-bit low-bits (1ull << 33) | 4095 1016define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) { 1017; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1018; GFX9: ; %bb.0: 1019; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1020; GFX9-NEXT: v_mov_b32_e32 v0, 0 1021; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1022; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 1023; GFX9-NEXT: s_addc_u32 s1, s1, 2 1024; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1025; GFX9-NEXT: s_waitcnt vmcnt(0) 1026; GFX9-NEXT: global_store_byte v[0:1], v0, off 1027; GFX9-NEXT: s_endpgm 1028; 1029; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0: 1030; GFX10: ; %bb.0: 1031; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1032; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1033; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0 1034; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1035; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 1036; GFX10-NEXT: s_waitcnt vmcnt(0) 1037; GFX10-NEXT: global_store_byte v[0:1], v0, off 1038; GFX10-NEXT: s_endpgm 1039 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687 1040 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1041 store i8 %load, i8 addrspace(1)* undef 1042 ret void 1043} 1044 1045; Fill 12-bit low-bits (1ull << 33) | 4096 1046define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) { 1047; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1048; GFX9: ; %bb.0: 1049; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1050; GFX9-NEXT: v_mov_b32_e32 v0, 0 1051; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1052; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 1053; GFX9-NEXT: s_addc_u32 s1, s1, 2 1054; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1055; GFX9-NEXT: s_waitcnt vmcnt(0) 1056; GFX9-NEXT: global_store_byte v[0:1], v0, off 1057; GFX9-NEXT: s_endpgm 1058; 1059; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1: 1060; GFX10: ; %bb.0: 1061; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1062; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1063; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1064; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1065; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1066; GFX10-NEXT: s_waitcnt vmcnt(0) 1067; GFX10-NEXT: global_store_byte v[0:1], v0, off 1068; GFX10-NEXT: s_endpgm 1069 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688 1070 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1071 store i8 %load, i8 addrspace(1)* undef 1072 ret void 1073} 1074 1075; Fill 13-bit low-bits (1ull << 33) | 8191 1076define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) { 1077; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1078; GFX9: ; %bb.0: 1079; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1080; GFX9-NEXT: v_mov_b32_e32 v0, 0 1081; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1082; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 1083; GFX9-NEXT: s_addc_u32 s1, s1, 2 1084; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1085; GFX9-NEXT: s_waitcnt vmcnt(0) 1086; GFX9-NEXT: global_store_byte v[0:1], v0, off 1087; GFX9-NEXT: s_endpgm 1088; 1089; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0: 1090; GFX10: ; %bb.0: 1091; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1092; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1093; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 1094; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1095; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 1096; GFX10-NEXT: s_waitcnt vmcnt(0) 1097; GFX10-NEXT: global_store_byte v[0:1], v0, off 1098; GFX10-NEXT: s_endpgm 1099 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783 1100 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1101 store i8 %load, i8 addrspace(1)* undef 1102 ret void 1103} 1104 1105; Fill 13-bit low-bits (1ull << 33) | 8192 1106define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) { 1107; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1108; GFX9: ; %bb.0: 1109; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1110; GFX9-NEXT: v_mov_b32_e32 v0, 0 1111; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1112; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 1113; GFX9-NEXT: s_addc_u32 s1, s1, 2 1114; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1115; GFX9-NEXT: s_waitcnt vmcnt(0) 1116; GFX9-NEXT: global_store_byte v[0:1], v0, off 1117; GFX9-NEXT: s_endpgm 1118; 1119; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1: 1120; GFX10: ; %bb.0: 1121; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1122; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1123; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 1124; GFX10-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1125; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 1126; GFX10-NEXT: s_waitcnt vmcnt(0) 1127; GFX10-NEXT: global_store_byte v[0:1], v0, off 1128; GFX10-NEXT: s_endpgm 1129 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784 1130 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1131 store i8 %load, i8 addrspace(1)* undef 1132 ret void 1133} 1134 1135; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1136define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) { 1137; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1138; GFX9: ; %bb.0: 1139; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1140; GFX9-NEXT: v_mov_b32_e32 v0, 0 1141; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1142; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 1143; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1144; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1145; GFX9-NEXT: s_waitcnt vmcnt(0) 1146; GFX9-NEXT: global_store_byte v[0:1], v0, off 1147; GFX9-NEXT: s_endpgm 1148; 1149; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 1150; GFX10: ; %bb.0: 1151; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1152; GFX10-NEXT: v_mov_b32_e32 v0, 0 1153; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1154; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1155; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1156; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1157; GFX10-NEXT: s_waitcnt vmcnt(0) 1158; GFX10-NEXT: global_store_byte v[0:1], v0, off 1159; GFX10-NEXT: s_endpgm 1160 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761 1161 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1162 store i8 %load, i8 addrspace(1)* undef 1163 ret void 1164} 1165 1166; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1167define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) { 1168; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1169; GFX9: ; %bb.0: 1170; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1171; GFX9-NEXT: v_mov_b32_e32 v0, 0 1172; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1173; GFX9-NEXT: s_add_u32 s0, s0, 0x800 1174; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1175; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1176; GFX9-NEXT: s_waitcnt vmcnt(0) 1177; GFX9-NEXT: global_store_byte v[0:1], v0, off 1178; GFX9-NEXT: s_endpgm 1179; 1180; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 1181; GFX10: ; %bb.0: 1182; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1183; GFX10-NEXT: v_mov_b32_e32 v0, 0 1184; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1185; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1186; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1187; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1188; GFX10-NEXT: s_waitcnt vmcnt(0) 1189; GFX10-NEXT: global_store_byte v[0:1], v0, off 1190; GFX10-NEXT: s_endpgm 1191 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760 1192 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1193 store i8 %load, i8 addrspace(1)* undef 1194 ret void 1195} 1196 1197; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1198define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) { 1199; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1200; GFX9: ; %bb.0: 1201; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1202; GFX9-NEXT: v_mov_b32_e32 v0, 0 1203; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1204; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 1205; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1206; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1207; GFX9-NEXT: s_waitcnt vmcnt(0) 1208; GFX9-NEXT: global_store_byte v[0:1], v0, off 1209; GFX9-NEXT: s_endpgm 1210; 1211; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 1212; GFX10: ; %bb.0: 1213; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1214; GFX10-NEXT: v_mov_b32_e32 v0, 0 1215; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1216; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1217; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1218; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1219; GFX10-NEXT: s_waitcnt vmcnt(0) 1220; GFX10-NEXT: global_store_byte v[0:1], v0, off 1221; GFX10-NEXT: s_endpgm 1222 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713 1223 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1224 store i8 %load, i8 addrspace(1)* undef 1225 ret void 1226} 1227 1228; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1229define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) { 1230; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1231; GFX9: ; %bb.0: 1232; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1233; GFX9-NEXT: v_mov_b32_e32 v0, 0 1234; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1235; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 1236; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1237; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1238; GFX9-NEXT: s_waitcnt vmcnt(0) 1239; GFX9-NEXT: global_store_byte v[0:1], v0, off 1240; GFX9-NEXT: s_endpgm 1241; 1242; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 1243; GFX10: ; %bb.0: 1244; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1245; GFX10-NEXT: v_mov_b32_e32 v0, 0 1246; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1247; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1248; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1249; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1250; GFX10-NEXT: s_waitcnt vmcnt(0) 1251; GFX10-NEXT: global_store_byte v[0:1], v0, off 1252; GFX10-NEXT: s_endpgm 1253 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712 1254 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1255 store i8 %load, i8 addrspace(1)* undef 1256 ret void 1257} 1258 1259; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1260define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) { 1261; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1262; GFX9: ; %bb.0: 1263; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1264; GFX9-NEXT: v_mov_b32_e32 v0, 0 1265; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1266; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 1267; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1268; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1269; GFX9-NEXT: s_waitcnt vmcnt(0) 1270; GFX9-NEXT: global_store_byte v[0:1], v0, off 1271; GFX9-NEXT: s_endpgm 1272; 1273; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 1274; GFX10: ; %bb.0: 1275; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1276; GFX10-NEXT: v_mov_b32_e32 v0, 0 1277; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1278; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1279; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1280; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1281; GFX10-NEXT: s_waitcnt vmcnt(0) 1282; GFX10-NEXT: global_store_byte v[0:1], v0, off 1283; GFX10-NEXT: s_endpgm 1284 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617 1285 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1286 store i8 %load, i8 addrspace(1)* undef 1287 ret void 1288} 1289 1290; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1291define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) { 1292; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1293; GFX9: ; %bb.0: 1294; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1295; GFX9-NEXT: v_mov_b32_e32 v0, 0 1296; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1297; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 1298; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 1299; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 1300; GFX9-NEXT: s_waitcnt vmcnt(0) 1301; GFX9-NEXT: global_store_byte v[0:1], v0, off 1302; GFX9-NEXT: s_endpgm 1303; 1304; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 1305; GFX10: ; %bb.0: 1306; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1307; GFX10-NEXT: v_mov_b32_e32 v0, 0 1308; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1309; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1310; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1311; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 1312; GFX10-NEXT: s_waitcnt vmcnt(0) 1313; GFX10-NEXT: global_store_byte v[0:1], v0, off 1314; GFX10-NEXT: s_endpgm 1315 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616 1316 %load = load volatile i8, i8 addrspace(1)* %gep, align 1 1317 store i8 %load, i8 addrspace(1)* undef 1318 ret void 1319} 1320