1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4 5; Test splitting flat instruction offsets into the low and high bits 6; when the offset doesn't fit in the offset field. 7 8define i8 @flat_inst_valu_offset_1(i8* %p) { 9; GFX9-LABEL: flat_inst_valu_offset_1: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 13; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 14; GFX9-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX10-LABEL: flat_inst_valu_offset_1: 17; GFX10: ; %bb.0: 18; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 20; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, v0, 1 21; GFX10-NEXT: ; implicit-def: $vcc_hi 22; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 23; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 24; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 25; GFX10-NEXT: s_setpc_b64 s[30:31] 26 %gep = getelementptr i8, i8* %p, i64 1 27 %load = load i8, i8* %gep, align 4 28 ret i8 %load 29} 30 31define i8 @flat_inst_valu_offset_11bit_max(i8* %p) { 32; GFX9-LABEL: flat_inst_valu_offset_11bit_max: 33; GFX9: ; %bb.0: 34; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 36; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 37; GFX9-NEXT: s_setpc_b64 s[30:31] 38; 39; GFX10-LABEL: flat_inst_valu_offset_11bit_max: 40; GFX10: ; %bb.0: 41; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 43; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 44; GFX10-NEXT: ; implicit-def: $vcc_hi 45; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 46; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 47; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 48; GFX10-NEXT: s_setpc_b64 s[30:31] 49 %gep = getelementptr i8, i8* %p, i64 2047 50 %load = load i8, i8* %gep, align 4 51 ret i8 %load 52} 53 54define i8 @flat_inst_valu_offset_12bit_max(i8* %p) { 55; GFX9-LABEL: flat_inst_valu_offset_12bit_max: 56; GFX9: ; %bb.0: 57; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 59; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 60; GFX9-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX10-LABEL: flat_inst_valu_offset_12bit_max: 63; GFX10: ; %bb.0: 64; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 66; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 67; GFX10-NEXT: ; implicit-def: $vcc_hi 68; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 69; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 70; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 71; GFX10-NEXT: s_setpc_b64 s[30:31] 72 %gep = getelementptr i8, i8* %p, i64 4095 73 %load = load i8, i8* %gep, align 4 74 ret i8 %load 75} 76 77define i8 @flat_inst_valu_offset_13bit_max(i8* %p) { 78; GFX9-LABEL: flat_inst_valu_offset_13bit_max: 79; GFX9: ; %bb.0: 80; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 82; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 83; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 84; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 85; GFX9-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX10-LABEL: flat_inst_valu_offset_13bit_max: 88; GFX10: ; %bb.0: 89; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 91; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 92; GFX10-NEXT: ; implicit-def: $vcc_hi 93; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 94; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 95; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 96; GFX10-NEXT: s_setpc_b64 s[30:31] 97 %gep = getelementptr i8, i8* %p, i64 8191 98 %load = load i8, i8* %gep, align 4 99 ret i8 %load 100} 101 102define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) { 103; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: 104; GFX9: ; %bb.0: 105; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 107; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 108; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 109; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 110; GFX9-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max: 113; GFX10: ; %bb.0: 114; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 116; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff800, v0 117; GFX10-NEXT: ; implicit-def: $vcc_hi 118; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 119; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 120; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 121; GFX10-NEXT: s_setpc_b64 s[30:31] 122 %gep = getelementptr i8, i8* %p, i64 -2048 123 %load = load i8, i8* %gep, align 4 124 ret i8 %load 125} 126 127define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) { 128; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: 129; GFX9: ; %bb.0: 130; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 131; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 132; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 133; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 134; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 135; GFX9-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max: 138; GFX10: ; %bb.0: 139; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 141; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 142; GFX10-NEXT: ; implicit-def: $vcc_hi 143; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 144; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 145; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 146; GFX10-NEXT: s_setpc_b64 s[30:31] 147 %gep = getelementptr i8, i8* %p, i64 -4096 148 %load = load i8, i8* %gep, align 4 149 ret i8 %load 150} 151 152define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) { 153; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: 154; GFX9: ; %bb.0: 155; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 157; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 158; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 159; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 160; GFX9-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max: 163; GFX10: ; %bb.0: 164; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 166; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 167; GFX10-NEXT: ; implicit-def: $vcc_hi 168; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 169; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 170; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 171; GFX10-NEXT: s_setpc_b64 s[30:31] 172 %gep = getelementptr i8, i8* %p, i64 -8192 173 %load = load i8, i8* %gep, align 4 174 ret i8 %load 175} 176 177define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) { 178; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: 179; GFX9: ; %bb.0: 180; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 182; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 183; GFX9-NEXT: s_setpc_b64 s[30:31] 184; 185; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max: 186; GFX10: ; %bb.0: 187; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 189; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 190; GFX10-NEXT: ; implicit-def: $vcc_hi 191; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 192; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 193; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 194; GFX10-NEXT: s_setpc_b64 s[30:31] 195 %gep = getelementptr i8, i8* %p, i64 4095 196 %load = load i8, i8* %gep, align 4 197 ret i8 %load 198} 199 200define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) { 201; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max: 202; GFX9: ; %bb.0: 203; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 205; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 206; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 207; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 208; GFX9-NEXT: s_setpc_b64 s[30:31] 209; 210; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max: 211; GFX10: ; %bb.0: 212; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 214; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 215; GFX10-NEXT: ; implicit-def: $vcc_hi 216; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 217; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 218; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 219; GFX10-NEXT: s_setpc_b64 s[30:31] 220 %gep = getelementptr i8, i8* %p, i64 8191 221 %load = load i8, i8* %gep, align 4 222 ret i8 %load 223} 224 225define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) { 226; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max: 227; GFX9: ; %bb.0: 228; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 230; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 231; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 232; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 233; GFX9-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max: 236; GFX10: ; %bb.0: 237; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 239; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3fff, v0 240; GFX10-NEXT: ; implicit-def: $vcc_hi 241; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 242; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 243; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 244; GFX10-NEXT: s_setpc_b64 s[30:31] 245 %gep = getelementptr i8, i8* %p, i64 16383 246 %load = load i8, i8* %gep, align 4 247 ret i8 %load 248} 249 250define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) { 251; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 252; GFX9: ; %bb.0: 253; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 255; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 256; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 257; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 258; GFX9-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 261; GFX10: ; %bb.0: 262; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 264; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0 265; GFX10-NEXT: ; implicit-def: $vcc_hi 266; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 267; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 268; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 269; GFX10-NEXT: s_setpc_b64 s[30:31] 270 %gep = getelementptr i8, i8* %p, i64 -4096 271 %load = load i8, i8* %gep, align 4 272 ret i8 %load 273} 274 275define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) { 276; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 277; GFX9: ; %bb.0: 278; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 280; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 281; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 282; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 283; GFX9-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 286; GFX10: ; %bb.0: 287; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 289; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0 290; GFX10-NEXT: ; implicit-def: $vcc_hi 291; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 292; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 293; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 294; GFX10-NEXT: s_setpc_b64 s[30:31] 295 %gep = getelementptr i8, i8* %p, i64 -8192 296 %load = load i8, i8* %gep, align 4 297 ret i8 %load 298} 299 300define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) { 301; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 302; GFX9: ; %bb.0: 303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 305; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 306; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 307; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 308; GFX9-NEXT: s_setpc_b64 s[30:31] 309; 310; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 311; GFX10: ; %bb.0: 312; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 313; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 314; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0 315; GFX10-NEXT: ; implicit-def: $vcc_hi 316; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 317; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 318; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 319; GFX10-NEXT: s_setpc_b64 s[30:31] 320 %gep = getelementptr i8, i8* %p, i64 -16384 321 %load = load i8, i8* %gep, align 4 322 ret i8 %load 323} 324 325; Fill 11-bit low-bits (1ull << 33) | 2047 326define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) { 327; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 328; GFX9: ; %bb.0: 329; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 331; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 332; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 333; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 334; GFX9-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 337; GFX10: ; %bb.0: 338; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 340; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 341; GFX10-NEXT: ; implicit-def: $vcc_hi 342; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 343; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 344; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 345; GFX10-NEXT: s_setpc_b64 s[30:31] 346 %gep = getelementptr i8, i8* %p, i64 8589936639 347 %load = load i8, i8* %gep, align 4 348 ret i8 %load 349} 350 351; Fill 11-bit low-bits (1ull << 33) | 2048 352define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) { 353; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 354; GFX9: ; %bb.0: 355; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 357; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 358; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 359; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 360; GFX9-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 363; GFX10: ; %bb.0: 364; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 366; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 367; GFX10-NEXT: ; implicit-def: $vcc_hi 368; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 369; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 370; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 371; GFX10-NEXT: s_setpc_b64 s[30:31] 372 %gep = getelementptr i8, i8* %p, i64 8589936640 373 %load = load i8, i8* %gep, align 4 374 ret i8 %load 375} 376 377; Fill 12-bit low-bits (1ull << 33) | 4095 378define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) { 379; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 380; GFX9: ; %bb.0: 381; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 382; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 383; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 384; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 385; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 386; GFX9-NEXT: s_setpc_b64 s[30:31] 387; 388; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 389; GFX10: ; %bb.0: 390; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 392; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 393; GFX10-NEXT: ; implicit-def: $vcc_hi 394; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 395; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 396; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 397; GFX10-NEXT: s_setpc_b64 s[30:31] 398 %gep = getelementptr i8, i8* %p, i64 8589938687 399 %load = load i8, i8* %gep, align 4 400 ret i8 %load 401} 402 403; Fill 12-bit low-bits (1ull << 33) | 4096 404define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) { 405; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 406; GFX9: ; %bb.0: 407; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 409; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 410; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 411; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 412; GFX9-NEXT: s_setpc_b64 s[30:31] 413; 414; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 415; GFX10: ; %bb.0: 416; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 417; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 418; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 419; GFX10-NEXT: ; implicit-def: $vcc_hi 420; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 421; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 422; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 423; GFX10-NEXT: s_setpc_b64 s[30:31] 424 %gep = getelementptr i8, i8* %p, i64 8589938688 425 %load = load i8, i8* %gep, align 4 426 ret i8 %load 427} 428 429; Fill 13-bit low-bits (1ull << 33) | 8191 430define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) { 431; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 432; GFX9: ; %bb.0: 433; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 435; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 436; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 437; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 438; GFX9-NEXT: s_setpc_b64 s[30:31] 439; 440; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 441; GFX10: ; %bb.0: 442; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 444; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 445; GFX10-NEXT: ; implicit-def: $vcc_hi 446; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 447; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 448; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 449; GFX10-NEXT: s_setpc_b64 s[30:31] 450 %gep = getelementptr i8, i8* %p, i64 8589942783 451 %load = load i8, i8* %gep, align 4 452 ret i8 %load 453} 454 455; Fill 13-bit low-bits (1ull << 33) | 8192 456define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) { 457; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 458; GFX9: ; %bb.0: 459; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 460; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 461; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 462; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 463; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 464; GFX9-NEXT: s_setpc_b64 s[30:31] 465; 466; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 467; GFX10: ; %bb.0: 468; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 470; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 471; GFX10-NEXT: ; implicit-def: $vcc_hi 472; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 473; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 474; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 475; GFX10-NEXT: s_setpc_b64 s[30:31] 476 %gep = getelementptr i8, i8* %p, i64 8589942784 477 %load = load i8, i8* %gep, align 4 478 ret i8 %load 479} 480 481; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 482define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) { 483; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 484; GFX9: ; %bb.0: 485; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 487; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 488; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 489; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 490; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 491; GFX9-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 494; GFX10: ; %bb.0: 495; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 497; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0 498; GFX10-NEXT: ; implicit-def: $vcc_hi 499; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 500; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 501; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 502; GFX10-NEXT: s_setpc_b64 s[30:31] 503 %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 504 %load = load i8, i8* %gep, align 4 505 ret i8 %load 506} 507 508; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 509define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) { 510; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 511; GFX9: ; %bb.0: 512; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 513; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 514; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 515; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 516; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 517; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 518; GFX9-NEXT: s_setpc_b64 s[30:31] 519; 520; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 521; GFX10: ; %bb.0: 522; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 523; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 524; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0 525; GFX10-NEXT: ; implicit-def: $vcc_hi 526; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 527; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 528; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 529; GFX10-NEXT: s_setpc_b64 s[30:31] 530 %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 531 %load = load i8, i8* %gep, align 4 532 ret i8 %load 533} 534 535; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 536define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) { 537; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 538; GFX9: ; %bb.0: 539; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 540; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 541; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 542; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 543; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 544; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 545; GFX9-NEXT: s_setpc_b64 s[30:31] 546; 547; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 548; GFX10: ; %bb.0: 549; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 551; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0 552; GFX10-NEXT: ; implicit-def: $vcc_hi 553; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 554; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 555; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 556; GFX10-NEXT: s_setpc_b64 s[30:31] 557 %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 558 %load = load i8, i8* %gep, align 4 559 ret i8 %load 560} 561 562; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 563define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) { 564; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 565; GFX9: ; %bb.0: 566; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 567; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 568; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 569; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 570; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 571; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 572; GFX9-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 575; GFX10: ; %bb.0: 576; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 578; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0 579; GFX10-NEXT: ; implicit-def: $vcc_hi 580; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 581; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 582; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 583; GFX10-NEXT: s_setpc_b64 s[30:31] 584 %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 585 %load = load i8, i8* %gep, align 4 586 ret i8 %load 587} 588 589; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 590define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) { 591; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 592; GFX9: ; %bb.0: 593; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 594; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 595; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 596; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 597; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 598; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 599; GFX9-NEXT: s_setpc_b64 s[30:31] 600; 601; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 602; GFX10: ; %bb.0: 603; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 605; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0 606; GFX10-NEXT: ; implicit-def: $vcc_hi 607; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 608; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 609; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 610; GFX10-NEXT: s_setpc_b64 s[30:31] 611 %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 612 %load = load i8, i8* %gep, align 4 613 ret i8 %load 614} 615 616; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 617define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) { 618; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 619; GFX9: ; %bb.0: 620; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 622; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 623; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 624; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 625; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 626; GFX9-NEXT: s_setpc_b64 s[30:31] 627; 628; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 629; GFX10: ; %bb.0: 630; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 632; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0 633; GFX10-NEXT: ; implicit-def: $vcc_hi 634; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 635; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 636; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 637; GFX10-NEXT: s_setpc_b64 s[30:31] 638 %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 639 %load = load i8, i8* %gep, align 4 640 ret i8 %load 641} 642 643define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) { 644; GFX9-LABEL: flat_inst_salu_offset_1: 645; GFX9: ; %bb.0: 646; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 647; GFX9-NEXT: s_waitcnt lgkmcnt(0) 648; GFX9-NEXT: v_mov_b32_e32 v0, s0 649; GFX9-NEXT: v_mov_b32_e32 v1, s1 650; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 651; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 652; GFX9-NEXT: flat_store_byte v[0:1], v0 653; GFX9-NEXT: s_endpgm 654; 655; GFX10-LABEL: flat_inst_salu_offset_1: 656; GFX10: ; %bb.0: 657; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 658; GFX10-NEXT: ; implicit-def: $vcc_hi 659; GFX10-NEXT: s_waitcnt lgkmcnt(0) 660; GFX10-NEXT: s_add_u32 s0, s0, 1 661; GFX10-NEXT: s_addc_u32 s1, s1, 0 662; GFX10-NEXT: v_mov_b32_e32 v0, s0 663; GFX10-NEXT: v_mov_b32_e32 v1, s1 664; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 665; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 666; GFX10-NEXT: flat_store_byte v[0:1], v0 667; GFX10-NEXT: s_endpgm 668 %gep = getelementptr i8, i8* %p, i64 1 669 %load = load volatile i8, i8* %gep, align 1 670 store i8 %load, i8* undef 671 ret void 672} 673 674define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) { 675; GFX9-LABEL: flat_inst_salu_offset_11bit_max: 676; GFX9: ; %bb.0: 677; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 678; GFX9-NEXT: s_waitcnt lgkmcnt(0) 679; GFX9-NEXT: v_mov_b32_e32 v0, s0 680; GFX9-NEXT: v_mov_b32_e32 v1, s1 681; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 682; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 683; GFX9-NEXT: flat_store_byte v[0:1], v0 684; GFX9-NEXT: s_endpgm 685; 686; GFX10-LABEL: flat_inst_salu_offset_11bit_max: 687; GFX10: ; %bb.0: 688; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 689; GFX10-NEXT: ; implicit-def: $vcc_hi 690; GFX10-NEXT: s_waitcnt lgkmcnt(0) 691; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 692; GFX10-NEXT: s_addc_u32 s1, s1, 0 693; GFX10-NEXT: v_mov_b32_e32 v0, s0 694; GFX10-NEXT: v_mov_b32_e32 v1, s1 695; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 696; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 697; GFX10-NEXT: flat_store_byte v[0:1], v0 698; GFX10-NEXT: s_endpgm 699 %gep = getelementptr i8, i8* %p, i64 2047 700 %load = load volatile i8, i8* %gep, align 1 701 store i8 %load, i8* undef 702 ret void 703} 704 705define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) { 706; GFX9-LABEL: flat_inst_salu_offset_12bit_max: 707; GFX9: ; %bb.0: 708; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 709; GFX9-NEXT: s_waitcnt lgkmcnt(0) 710; GFX9-NEXT: v_mov_b32_e32 v0, s0 711; GFX9-NEXT: v_mov_b32_e32 v1, s1 712; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 713; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 714; GFX9-NEXT: flat_store_byte v[0:1], v0 715; GFX9-NEXT: s_endpgm 716; 717; GFX10-LABEL: flat_inst_salu_offset_12bit_max: 718; GFX10: ; %bb.0: 719; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 720; GFX10-NEXT: ; implicit-def: $vcc_hi 721; GFX10-NEXT: s_waitcnt lgkmcnt(0) 722; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 723; GFX10-NEXT: s_addc_u32 s1, s1, 0 724; GFX10-NEXT: v_mov_b32_e32 v0, s0 725; GFX10-NEXT: v_mov_b32_e32 v1, s1 726; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 727; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 728; GFX10-NEXT: flat_store_byte v[0:1], v0 729; GFX10-NEXT: s_endpgm 730 %gep = getelementptr i8, i8* %p, i64 4095 731 %load = load volatile i8, i8* %gep, align 1 732 store i8 %load, i8* undef 733 ret void 734} 735 736define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) { 737; GFX9-LABEL: flat_inst_salu_offset_13bit_max: 738; GFX9: ; %bb.0: 739; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 740; GFX9-NEXT: s_waitcnt lgkmcnt(0) 741; GFX9-NEXT: v_mov_b32_e32 v0, s0 742; GFX9-NEXT: v_mov_b32_e32 v1, s1 743; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 744; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 745; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 746; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 747; GFX9-NEXT: flat_store_byte v[0:1], v0 748; GFX9-NEXT: s_endpgm 749; 750; GFX10-LABEL: flat_inst_salu_offset_13bit_max: 751; GFX10: ; %bb.0: 752; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 753; GFX10-NEXT: ; implicit-def: $vcc_hi 754; GFX10-NEXT: s_waitcnt lgkmcnt(0) 755; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 756; GFX10-NEXT: s_addc_u32 s1, s1, 0 757; GFX10-NEXT: v_mov_b32_e32 v0, s0 758; GFX10-NEXT: v_mov_b32_e32 v1, s1 759; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 760; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 761; GFX10-NEXT: flat_store_byte v[0:1], v0 762; GFX10-NEXT: s_endpgm 763 %gep = getelementptr i8, i8* %p, i64 8191 764 %load = load volatile i8, i8* %gep, align 1 765 store i8 %load, i8* undef 766 ret void 767} 768 769define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) { 770; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max: 771; GFX9: ; %bb.0: 772; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 773; GFX9-NEXT: s_waitcnt lgkmcnt(0) 774; GFX9-NEXT: v_mov_b32_e32 v0, s0 775; GFX9-NEXT: v_mov_b32_e32 v1, s1 776; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 777; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 778; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 779; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 780; GFX9-NEXT: flat_store_byte v[0:1], v0 781; GFX9-NEXT: s_endpgm 782; 783; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max: 784; GFX10: ; %bb.0: 785; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 786; GFX10-NEXT: ; implicit-def: $vcc_hi 787; GFX10-NEXT: s_waitcnt lgkmcnt(0) 788; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff800 789; GFX10-NEXT: s_addc_u32 s1, s1, -1 790; GFX10-NEXT: v_mov_b32_e32 v0, s0 791; GFX10-NEXT: v_mov_b32_e32 v1, s1 792; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 793; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 794; GFX10-NEXT: flat_store_byte v[0:1], v0 795; GFX10-NEXT: s_endpgm 796 %gep = getelementptr i8, i8* %p, i64 -2048 797 %load = load volatile i8, i8* %gep, align 1 798 store i8 %load, i8* undef 799 ret void 800} 801 802define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) { 803; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max: 804; GFX9: ; %bb.0: 805; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 806; GFX9-NEXT: s_waitcnt lgkmcnt(0) 807; GFX9-NEXT: v_mov_b32_e32 v0, s0 808; GFX9-NEXT: v_mov_b32_e32 v1, s1 809; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 810; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 811; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 812; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 813; GFX9-NEXT: flat_store_byte v[0:1], v0 814; GFX9-NEXT: s_endpgm 815; 816; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max: 817; GFX10: ; %bb.0: 818; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 819; GFX10-NEXT: ; implicit-def: $vcc_hi 820; GFX10-NEXT: s_waitcnt lgkmcnt(0) 821; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 822; GFX10-NEXT: s_addc_u32 s1, s1, -1 823; GFX10-NEXT: v_mov_b32_e32 v0, s0 824; GFX10-NEXT: v_mov_b32_e32 v1, s1 825; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 826; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 827; GFX10-NEXT: flat_store_byte v[0:1], v0 828; GFX10-NEXT: s_endpgm 829 %gep = getelementptr i8, i8* %p, i64 -4096 830 %load = load volatile i8, i8* %gep, align 1 831 store i8 %load, i8* undef 832 ret void 833} 834 835define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) { 836; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max: 837; GFX9: ; %bb.0: 838; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 839; GFX9-NEXT: s_waitcnt lgkmcnt(0) 840; GFX9-NEXT: v_mov_b32_e32 v0, s0 841; GFX9-NEXT: v_mov_b32_e32 v1, s1 842; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 843; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 844; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 845; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 846; GFX9-NEXT: flat_store_byte v[0:1], v0 847; GFX9-NEXT: s_endpgm 848; 849; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max: 850; GFX10: ; %bb.0: 851; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 852; GFX10-NEXT: ; implicit-def: $vcc_hi 853; GFX10-NEXT: s_waitcnt lgkmcnt(0) 854; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 855; GFX10-NEXT: s_addc_u32 s1, s1, -1 856; GFX10-NEXT: v_mov_b32_e32 v0, s0 857; GFX10-NEXT: v_mov_b32_e32 v1, s1 858; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 859; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 860; GFX10-NEXT: flat_store_byte v[0:1], v0 861; GFX10-NEXT: s_endpgm 862 %gep = getelementptr i8, i8* %p, i64 -8192 863 %load = load volatile i8, i8* %gep, align 1 864 store i8 %load, i8* undef 865 ret void 866} 867 868define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) { 869; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max: 870; GFX9: ; %bb.0: 871; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 872; GFX9-NEXT: s_waitcnt lgkmcnt(0) 873; GFX9-NEXT: v_mov_b32_e32 v0, s0 874; GFX9-NEXT: v_mov_b32_e32 v1, s1 875; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 876; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 877; GFX9-NEXT: flat_store_byte v[0:1], v0 878; GFX9-NEXT: s_endpgm 879; 880; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max: 881; GFX10: ; %bb.0: 882; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 883; GFX10-NEXT: ; implicit-def: $vcc_hi 884; GFX10-NEXT: s_waitcnt lgkmcnt(0) 885; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 886; GFX10-NEXT: s_addc_u32 s1, s1, 0 887; GFX10-NEXT: v_mov_b32_e32 v0, s0 888; GFX10-NEXT: v_mov_b32_e32 v1, s1 889; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 890; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 891; GFX10-NEXT: flat_store_byte v[0:1], v0 892; GFX10-NEXT: s_endpgm 893 %gep = getelementptr i8, i8* %p, i64 4095 894 %load = load volatile i8, i8* %gep, align 1 895 store i8 %load, i8* undef 896 ret void 897} 898 899define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) { 900; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max: 901; GFX9: ; %bb.0: 902; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 903; GFX9-NEXT: s_waitcnt lgkmcnt(0) 904; GFX9-NEXT: v_mov_b32_e32 v0, s0 905; GFX9-NEXT: v_mov_b32_e32 v1, s1 906; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 907; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 908; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 909; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 910; GFX9-NEXT: flat_store_byte v[0:1], v0 911; GFX9-NEXT: s_endpgm 912; 913; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max: 914; GFX10: ; %bb.0: 915; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 916; GFX10-NEXT: ; implicit-def: $vcc_hi 917; GFX10-NEXT: s_waitcnt lgkmcnt(0) 918; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 919; GFX10-NEXT: s_addc_u32 s1, s1, 0 920; GFX10-NEXT: v_mov_b32_e32 v0, s0 921; GFX10-NEXT: v_mov_b32_e32 v1, s1 922; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 923; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 924; GFX10-NEXT: flat_store_byte v[0:1], v0 925; GFX10-NEXT: s_endpgm 926 %gep = getelementptr i8, i8* %p, i64 8191 927 %load = load volatile i8, i8* %gep, align 1 928 store i8 %load, i8* undef 929 ret void 930} 931 932define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) { 933; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max: 934; GFX9: ; %bb.0: 935; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 936; GFX9-NEXT: s_waitcnt lgkmcnt(0) 937; GFX9-NEXT: v_mov_b32_e32 v0, s0 938; GFX9-NEXT: v_mov_b32_e32 v1, s1 939; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 940; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 941; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 942; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 943; GFX9-NEXT: flat_store_byte v[0:1], v0 944; GFX9-NEXT: s_endpgm 945; 946; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max: 947; GFX10: ; %bb.0: 948; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 949; GFX10-NEXT: ; implicit-def: $vcc_hi 950; GFX10-NEXT: s_waitcnt lgkmcnt(0) 951; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff 952; GFX10-NEXT: s_addc_u32 s1, s1, 0 953; GFX10-NEXT: v_mov_b32_e32 v0, s0 954; GFX10-NEXT: v_mov_b32_e32 v1, s1 955; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 956; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 957; GFX10-NEXT: flat_store_byte v[0:1], v0 958; GFX10-NEXT: s_endpgm 959 %gep = getelementptr i8, i8* %p, i64 16383 960 %load = load volatile i8, i8* %gep, align 1 961 store i8 %load, i8* undef 962 ret void 963} 964 965define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) { 966; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 967; GFX9: ; %bb.0: 968; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 969; GFX9-NEXT: s_waitcnt lgkmcnt(0) 970; GFX9-NEXT: v_mov_b32_e32 v0, s0 971; GFX9-NEXT: v_mov_b32_e32 v1, s1 972; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 973; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 974; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 975; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 976; GFX9-NEXT: flat_store_byte v[0:1], v0 977; GFX9-NEXT: s_endpgm 978; 979; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 980; GFX10: ; %bb.0: 981; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 982; GFX10-NEXT: ; implicit-def: $vcc_hi 983; GFX10-NEXT: s_waitcnt lgkmcnt(0) 984; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 985; GFX10-NEXT: s_addc_u32 s1, s1, -1 986; GFX10-NEXT: v_mov_b32_e32 v0, s0 987; GFX10-NEXT: v_mov_b32_e32 v1, s1 988; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 989; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 990; GFX10-NEXT: flat_store_byte v[0:1], v0 991; GFX10-NEXT: s_endpgm 992 %gep = getelementptr i8, i8* %p, i64 -4096 993 %load = load volatile i8, i8* %gep, align 1 994 store i8 %load, i8* undef 995 ret void 996} 997 998define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) { 999; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 1000; GFX9: ; %bb.0: 1001; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1002; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1003; GFX9-NEXT: v_mov_b32_e32 v0, s0 1004; GFX9-NEXT: v_mov_b32_e32 v1, s1 1005; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 1006; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1007; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1008; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1009; GFX9-NEXT: flat_store_byte v[0:1], v0 1010; GFX9-NEXT: s_endpgm 1011; 1012; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 1013; GFX10: ; %bb.0: 1014; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1015; GFX10-NEXT: ; implicit-def: $vcc_hi 1016; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 1018; GFX10-NEXT: s_addc_u32 s1, s1, -1 1019; GFX10-NEXT: v_mov_b32_e32 v0, s0 1020; GFX10-NEXT: v_mov_b32_e32 v1, s1 1021; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1022; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1023; GFX10-NEXT: flat_store_byte v[0:1], v0 1024; GFX10-NEXT: s_endpgm 1025 %gep = getelementptr i8, i8* %p, i64 -8192 1026 %load = load volatile i8, i8* %gep, align 1 1027 store i8 %load, i8* undef 1028 ret void 1029} 1030 1031define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) { 1032; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 1033; GFX9: ; %bb.0: 1034; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1035; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1036; GFX9-NEXT: v_mov_b32_e32 v0, s0 1037; GFX9-NEXT: v_mov_b32_e32 v1, s1 1038; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 1039; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1040; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1041; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1042; GFX9-NEXT: flat_store_byte v[0:1], v0 1043; GFX9-NEXT: s_endpgm 1044; 1045; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 1046; GFX10: ; %bb.0: 1047; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1048; GFX10-NEXT: ; implicit-def: $vcc_hi 1049; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1050; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000 1051; GFX10-NEXT: s_addc_u32 s1, s1, -1 1052; GFX10-NEXT: v_mov_b32_e32 v0, s0 1053; GFX10-NEXT: v_mov_b32_e32 v1, s1 1054; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1055; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1056; GFX10-NEXT: flat_store_byte v[0:1], v0 1057; GFX10-NEXT: s_endpgm 1058 %gep = getelementptr i8, i8* %p, i64 -16384 1059 %load = load volatile i8, i8* %gep, align 1 1060 store i8 %load, i8* undef 1061 ret void 1062} 1063 1064; Fill 11-bit low-bits (1ull << 33) | 2047 1065define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) { 1066; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 1067; GFX9: ; %bb.0: 1068; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1069; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1070; GFX9-NEXT: v_mov_b32_e32 v1, s1 1071; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1072; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1073; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 1074; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1075; GFX9-NEXT: flat_store_byte v[0:1], v0 1076; GFX9-NEXT: s_endpgm 1077; 1078; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 1079; GFX10: ; %bb.0: 1080; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1081; GFX10-NEXT: ; implicit-def: $vcc_hi 1082; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1083; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1084; GFX10-NEXT: s_addc_u32 s1, s1, 2 1085; GFX10-NEXT: v_mov_b32_e32 v0, s0 1086; GFX10-NEXT: v_mov_b32_e32 v1, s1 1087; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1088; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1089; GFX10-NEXT: flat_store_byte v[0:1], v0 1090; GFX10-NEXT: s_endpgm 1091 %gep = getelementptr i8, i8* %p, i64 8589936639 1092 %load = load volatile i8, i8* %gep, align 1 1093 store i8 %load, i8* undef 1094 ret void 1095} 1096 1097; Fill 11-bit low-bits (1ull << 33) | 2048 1098define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) { 1099; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 1100; GFX9: ; %bb.0: 1101; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1102; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1103; GFX9-NEXT: v_mov_b32_e32 v1, s1 1104; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1105; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1106; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 1107; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1108; GFX9-NEXT: flat_store_byte v[0:1], v0 1109; GFX9-NEXT: s_endpgm 1110; 1111; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 1112; GFX10: ; %bb.0: 1113; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1114; GFX10-NEXT: ; implicit-def: $vcc_hi 1115; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1116; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1117; GFX10-NEXT: s_addc_u32 s1, s1, 2 1118; GFX10-NEXT: v_mov_b32_e32 v0, s0 1119; GFX10-NEXT: v_mov_b32_e32 v1, s1 1120; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1121; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1122; GFX10-NEXT: flat_store_byte v[0:1], v0 1123; GFX10-NEXT: s_endpgm 1124 %gep = getelementptr i8, i8* %p, i64 8589936640 1125 %load = load volatile i8, i8* %gep, align 1 1126 store i8 %load, i8* undef 1127 ret void 1128} 1129 1130; Fill 12-bit low-bits (1ull << 33) | 4095 1131define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) { 1132; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 1133; GFX9: ; %bb.0: 1134; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1135; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1136; GFX9-NEXT: v_mov_b32_e32 v1, s1 1137; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1138; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1139; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 1140; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1141; GFX9-NEXT: flat_store_byte v[0:1], v0 1142; GFX9-NEXT: s_endpgm 1143; 1144; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 1145; GFX10: ; %bb.0: 1146; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1147; GFX10-NEXT: ; implicit-def: $vcc_hi 1148; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1149; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1150; GFX10-NEXT: s_addc_u32 s1, s1, 2 1151; GFX10-NEXT: v_mov_b32_e32 v0, s0 1152; GFX10-NEXT: v_mov_b32_e32 v1, s1 1153; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1154; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1155; GFX10-NEXT: flat_store_byte v[0:1], v0 1156; GFX10-NEXT: s_endpgm 1157 %gep = getelementptr i8, i8* %p, i64 8589938687 1158 %load = load volatile i8, i8* %gep, align 1 1159 store i8 %load, i8* undef 1160 ret void 1161} 1162 1163; Fill 12-bit low-bits (1ull << 33) | 4096 1164define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) { 1165; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 1166; GFX9: ; %bb.0: 1167; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1168; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1169; GFX9-NEXT: v_mov_b32_e32 v0, s0 1170; GFX9-NEXT: v_mov_b32_e32 v1, s1 1171; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1172; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1173; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1174; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1175; GFX9-NEXT: flat_store_byte v[0:1], v0 1176; GFX9-NEXT: s_endpgm 1177; 1178; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 1179; GFX10: ; %bb.0: 1180; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1181; GFX10-NEXT: ; implicit-def: $vcc_hi 1182; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1183; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1184; GFX10-NEXT: s_addc_u32 s1, s1, 2 1185; GFX10-NEXT: v_mov_b32_e32 v0, s0 1186; GFX10-NEXT: v_mov_b32_e32 v1, s1 1187; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1188; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1189; GFX10-NEXT: flat_store_byte v[0:1], v0 1190; GFX10-NEXT: s_endpgm 1191 %gep = getelementptr i8, i8* %p, i64 8589938688 1192 %load = load volatile i8, i8* %gep, align 1 1193 store i8 %load, i8* undef 1194 ret void 1195} 1196 1197; Fill 13-bit low-bits (1ull << 33) | 8191 1198define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) { 1199; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 1200; GFX9: ; %bb.0: 1201; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1202; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1203; GFX9-NEXT: v_mov_b32_e32 v0, s0 1204; GFX9-NEXT: v_mov_b32_e32 v1, s1 1205; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1206; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1207; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 1208; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1209; GFX9-NEXT: flat_store_byte v[0:1], v0 1210; GFX9-NEXT: s_endpgm 1211; 1212; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 1213; GFX10: ; %bb.0: 1214; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1215; GFX10-NEXT: ; implicit-def: $vcc_hi 1216; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1217; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1218; GFX10-NEXT: s_addc_u32 s1, s1, 2 1219; GFX10-NEXT: v_mov_b32_e32 v0, s0 1220; GFX10-NEXT: v_mov_b32_e32 v1, s1 1221; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1222; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1223; GFX10-NEXT: flat_store_byte v[0:1], v0 1224; GFX10-NEXT: s_endpgm 1225 %gep = getelementptr i8, i8* %p, i64 8589942783 1226 %load = load volatile i8, i8* %gep, align 1 1227 store i8 %load, i8* undef 1228 ret void 1229} 1230 1231; Fill 13-bit low-bits (1ull << 33) | 8192 1232define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) { 1233; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 1234; GFX9: ; %bb.0: 1235; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1236; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1237; GFX9-NEXT: v_mov_b32_e32 v0, s0 1238; GFX9-NEXT: v_mov_b32_e32 v1, s1 1239; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1240; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1241; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1242; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1243; GFX9-NEXT: flat_store_byte v[0:1], v0 1244; GFX9-NEXT: s_endpgm 1245; 1246; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 1247; GFX10: ; %bb.0: 1248; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1249; GFX10-NEXT: ; implicit-def: $vcc_hi 1250; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1251; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1252; GFX10-NEXT: s_addc_u32 s1, s1, 2 1253; GFX10-NEXT: v_mov_b32_e32 v0, s0 1254; GFX10-NEXT: v_mov_b32_e32 v1, s1 1255; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1256; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1257; GFX10-NEXT: flat_store_byte v[0:1], v0 1258; GFX10-NEXT: s_endpgm 1259 %gep = getelementptr i8, i8* %p, i64 8589942784 1260 %load = load volatile i8, i8* %gep, align 1 1261 store i8 %load, i8* undef 1262 ret void 1263} 1264 1265; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1266define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) { 1267; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 1268; GFX9: ; %bb.0: 1269; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1270; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1271; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1272; GFX9-NEXT: v_mov_b32_e32 v0, s0 1273; GFX9-NEXT: v_mov_b32_e32 v2, s1 1274; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 1275; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1276; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1277; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1278; GFX9-NEXT: flat_store_byte v[0:1], v0 1279; GFX9-NEXT: s_endpgm 1280; 1281; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 1282; GFX10: ; %bb.0: 1283; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1284; GFX10-NEXT: ; implicit-def: $vcc_hi 1285; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1286; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1287; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1288; GFX10-NEXT: v_mov_b32_e32 v0, s0 1289; GFX10-NEXT: v_mov_b32_e32 v1, s1 1290; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1291; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1292; GFX10-NEXT: flat_store_byte v[0:1], v0 1293; GFX10-NEXT: s_endpgm 1294 %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 1295 %load = load volatile i8, i8* %gep, align 1 1296 store i8 %load, i8* undef 1297 ret void 1298} 1299 1300; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1301define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) { 1302; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 1303; GFX9: ; %bb.0: 1304; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1305; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1306; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1307; GFX9-NEXT: v_mov_b32_e32 v0, s0 1308; GFX9-NEXT: v_mov_b32_e32 v2, s1 1309; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1310; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1311; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1312; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1313; GFX9-NEXT: flat_store_byte v[0:1], v0 1314; GFX9-NEXT: s_endpgm 1315; 1316; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 1317; GFX10: ; %bb.0: 1318; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1319; GFX10-NEXT: ; implicit-def: $vcc_hi 1320; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1321; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1322; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1323; GFX10-NEXT: v_mov_b32_e32 v0, s0 1324; GFX10-NEXT: v_mov_b32_e32 v1, s1 1325; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1326; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1327; GFX10-NEXT: flat_store_byte v[0:1], v0 1328; GFX10-NEXT: s_endpgm 1329 %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 1330 %load = load volatile i8, i8* %gep, align 1 1331 store i8 %load, i8* undef 1332 ret void 1333} 1334 1335; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1336define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) { 1337; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 1338; GFX9: ; %bb.0: 1339; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1340; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1341; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1342; GFX9-NEXT: v_mov_b32_e32 v0, s0 1343; GFX9-NEXT: v_mov_b32_e32 v2, s1 1344; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1345; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1346; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1347; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1348; GFX9-NEXT: flat_store_byte v[0:1], v0 1349; GFX9-NEXT: s_endpgm 1350; 1351; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 1352; GFX10: ; %bb.0: 1353; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1354; GFX10-NEXT: ; implicit-def: $vcc_hi 1355; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1356; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1357; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1358; GFX10-NEXT: v_mov_b32_e32 v0, s0 1359; GFX10-NEXT: v_mov_b32_e32 v1, s1 1360; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1361; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1362; GFX10-NEXT: flat_store_byte v[0:1], v0 1363; GFX10-NEXT: s_endpgm 1364 %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 1365 %load = load volatile i8, i8* %gep, align 1 1366 store i8 %load, i8* undef 1367 ret void 1368} 1369 1370; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1371define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) { 1372; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 1373; GFX9: ; %bb.0: 1374; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1375; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1376; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1377; GFX9-NEXT: v_mov_b32_e32 v0, s0 1378; GFX9-NEXT: v_mov_b32_e32 v2, s1 1379; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1380; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1381; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1382; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1383; GFX9-NEXT: flat_store_byte v[0:1], v0 1384; GFX9-NEXT: s_endpgm 1385; 1386; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 1387; GFX10: ; %bb.0: 1388; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1389; GFX10-NEXT: ; implicit-def: $vcc_hi 1390; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1391; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1392; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1393; GFX10-NEXT: v_mov_b32_e32 v0, s0 1394; GFX10-NEXT: v_mov_b32_e32 v1, s1 1395; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1396; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1397; GFX10-NEXT: flat_store_byte v[0:1], v0 1398; GFX10-NEXT: s_endpgm 1399 %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 1400 %load = load volatile i8, i8* %gep, align 1 1401 store i8 %load, i8* undef 1402 ret void 1403} 1404 1405; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1406define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) { 1407; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 1408; GFX9: ; %bb.0: 1409; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1410; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1411; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1412; GFX9-NEXT: v_mov_b32_e32 v0, s0 1413; GFX9-NEXT: v_mov_b32_e32 v2, s1 1414; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1415; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1416; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1417; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1418; GFX9-NEXT: flat_store_byte v[0:1], v0 1419; GFX9-NEXT: s_endpgm 1420; 1421; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 1422; GFX10: ; %bb.0: 1423; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1424; GFX10-NEXT: ; implicit-def: $vcc_hi 1425; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1426; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1427; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1428; GFX10-NEXT: v_mov_b32_e32 v0, s0 1429; GFX10-NEXT: v_mov_b32_e32 v1, s1 1430; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1431; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1432; GFX10-NEXT: flat_store_byte v[0:1], v0 1433; GFX10-NEXT: s_endpgm 1434 %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 1435 %load = load volatile i8, i8* %gep, align 1 1436 store i8 %load, i8* undef 1437 ret void 1438} 1439 1440; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1441define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) { 1442; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 1443; GFX9: ; %bb.0: 1444; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1445; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1446; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1447; GFX9-NEXT: v_mov_b32_e32 v0, s0 1448; GFX9-NEXT: v_mov_b32_e32 v2, s1 1449; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1450; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1451; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1452; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1453; GFX9-NEXT: flat_store_byte v[0:1], v0 1454; GFX9-NEXT: s_endpgm 1455; 1456; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 1457; GFX10: ; %bb.0: 1458; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1459; GFX10-NEXT: ; implicit-def: $vcc_hi 1460; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1461; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1462; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1463; GFX10-NEXT: v_mov_b32_e32 v0, s0 1464; GFX10-NEXT: v_mov_b32_e32 v1, s1 1465; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1466; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1467; GFX10-NEXT: flat_store_byte v[0:1], v0 1468; GFX10-NEXT: s_endpgm 1469 %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 1470 %load = load volatile i8, i8* %gep, align 1 1471 store i8 %load, i8* undef 1472 ret void 1473} 1474