1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 4; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s 5 6; Test splitting flat instruction offsets into the low and high bits 7; when the offset doesn't fit in the offset field. 8 9define i8 @flat_inst_valu_offset_1(i8* %p) { 10; GFX9-LABEL: flat_inst_valu_offset_1: 11; GFX9: ; %bb.0: 12; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 14; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 15; GFX9-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX10-LABEL: flat_inst_valu_offset_1: 18; GFX10: ; %bb.0: 19; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 21; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 22; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 23; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 24; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 25; GFX10-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX11-LABEL: flat_inst_valu_offset_1: 28; GFX11: ; %bb.0: 29; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 31; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 32; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 33; GFX11-NEXT: s_setpc_b64 s[30:31] 34 %gep = getelementptr i8, i8* %p, i64 1 35 %load = load i8, i8* %gep, align 4 36 ret i8 %load 37} 38 39define i8 @flat_inst_valu_offset_11bit_max(i8* %p) { 40; GFX9-LABEL: flat_inst_valu_offset_11bit_max: 41; GFX9: ; %bb.0: 42; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 44; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 45; GFX9-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: flat_inst_valu_offset_11bit_max: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 51; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 52; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 53; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 54; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 55; GFX10-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX11-LABEL: flat_inst_valu_offset_11bit_max: 58; GFX11: ; %bb.0: 59; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 61; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 62; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 63; GFX11-NEXT: s_setpc_b64 s[30:31] 64 %gep = getelementptr i8, i8* %p, i64 2047 65 %load = load i8, i8* %gep, align 4 66 ret i8 %load 67} 68 69define i8 @flat_inst_valu_offset_12bit_max(i8* %p) { 70; GFX9-LABEL: flat_inst_valu_offset_12bit_max: 71; GFX9: ; %bb.0: 72; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 74; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 75; GFX9-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX10-LABEL: flat_inst_valu_offset_12bit_max: 78; GFX10: ; %bb.0: 79; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 81; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 82; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 83; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 84; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 85; GFX10-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX11-LABEL: flat_inst_valu_offset_12bit_max: 88; GFX11: ; %bb.0: 89; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 91; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 92; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 93; GFX11-NEXT: s_setpc_b64 s[30:31] 94 %gep = getelementptr i8, i8* %p, i64 4095 95 %load = load i8, i8* %gep, align 4 96 ret i8 %load 97} 98 99define i8 @flat_inst_valu_offset_13bit_max(i8* %p) { 100; GFX9-LABEL: flat_inst_valu_offset_13bit_max: 101; GFX9: ; %bb.0: 102; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 104; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 105; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 106; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 107; GFX9-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX10-LABEL: flat_inst_valu_offset_13bit_max: 110; GFX10: ; %bb.0: 111; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 113; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 114; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 115; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 116; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 117; GFX10-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX11-LABEL: flat_inst_valu_offset_13bit_max: 120; GFX11: ; %bb.0: 121; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 123; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 124; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 125; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 126; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 127; GFX11-NEXT: s_setpc_b64 s[30:31] 128 %gep = getelementptr i8, i8* %p, i64 8191 129 %load = load i8, i8* %gep, align 4 130 ret i8 %load 131} 132 133define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) { 134; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: 135; GFX9: ; %bb.0: 136; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 137; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 138; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 139; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 140; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 141; GFX9-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max: 144; GFX10: ; %bb.0: 145; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 147; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 148; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 149; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 150; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 151; GFX10-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max: 154; GFX11: ; %bb.0: 155; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 157; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 158; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 159; GFX11-NEXT: flat_load_u8 v0, v[0:1] 160; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 161; GFX11-NEXT: s_setpc_b64 s[30:31] 162 %gep = getelementptr i8, i8* %p, i64 -2048 163 %load = load i8, i8* %gep, align 4 164 ret i8 %load 165} 166 167define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) { 168; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: 169; GFX9: ; %bb.0: 170; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 172; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 173; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 174; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 175; GFX9-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max: 178; GFX10: ; %bb.0: 179; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 181; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 182; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 183; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 184; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 185; GFX10-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max: 188; GFX11: ; %bb.0: 189; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 191; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 192; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 193; GFX11-NEXT: flat_load_u8 v0, v[0:1] 194; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 195; GFX11-NEXT: s_setpc_b64 s[30:31] 196 %gep = getelementptr i8, i8* %p, i64 -4096 197 %load = load i8, i8* %gep, align 4 198 ret i8 %load 199} 200 201define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) { 202; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: 203; GFX9: ; %bb.0: 204; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 205; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 206; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 207; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 208; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 209; GFX9-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max: 212; GFX10: ; %bb.0: 213; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 215; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 216; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 217; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 218; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 219; GFX10-NEXT: s_setpc_b64 s[30:31] 220; 221; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max: 222; GFX11: ; %bb.0: 223; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 224; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 225; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 226; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 227; GFX11-NEXT: flat_load_u8 v0, v[0:1] 228; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 229; GFX11-NEXT: s_setpc_b64 s[30:31] 230 %gep = getelementptr i8, i8* %p, i64 -8192 231 %load = load i8, i8* %gep, align 4 232 ret i8 %load 233} 234 235define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) { 236; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: 237; GFX9: ; %bb.0: 238; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 240; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 241; GFX9-NEXT: s_setpc_b64 s[30:31] 242; 243; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max: 244; GFX10: ; %bb.0: 245; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 246; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 247; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 248; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 249; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 250; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 251; GFX10-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: 254; GFX11: ; %bb.0: 255; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 257; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 258; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 259; GFX11-NEXT: s_setpc_b64 s[30:31] 260 %gep = getelementptr i8, i8* %p, i64 4095 261 %load = load i8, i8* %gep, align 4 262 ret i8 %load 263} 264 265define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) { 266; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max: 267; GFX9: ; %bb.0: 268; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 270; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 271; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 272; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 273; GFX9-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max: 276; GFX10: ; %bb.0: 277; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 279; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 280; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 281; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 282; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 283; GFX10-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX11-LABEL: flat_inst_valu_offset_2x_12bit_max: 286; GFX11: ; %bb.0: 287; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 289; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 290; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 291; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 292; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 293; GFX11-NEXT: s_setpc_b64 s[30:31] 294 %gep = getelementptr i8, i8* %p, i64 8191 295 %load = load i8, i8* %gep, align 4 296 ret i8 %load 297} 298 299define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) { 300; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max: 301; GFX9: ; %bb.0: 302; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 304; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 305; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 306; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 307; GFX9-NEXT: s_setpc_b64 s[30:31] 308; 309; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max: 310; GFX10: ; %bb.0: 311; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 313; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 314; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 315; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 316; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 317; GFX10-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX11-LABEL: flat_inst_valu_offset_2x_13bit_max: 320; GFX11: ; %bb.0: 321; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 323; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 324; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 325; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 326; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 327; GFX11-NEXT: s_setpc_b64 s[30:31] 328 %gep = getelementptr i8, i8* %p, i64 16383 329 %load = load i8, i8* %gep, align 4 330 ret i8 %load 331} 332 333define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) { 334; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 335; GFX9: ; %bb.0: 336; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 338; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 339; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 340; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 341; GFX9-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 344; GFX10: ; %bb.0: 345; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 347; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 348; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 349; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 350; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 351; GFX10-NEXT: s_setpc_b64 s[30:31] 352; 353; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 354; GFX11: ; %bb.0: 355; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 357; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 358; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 359; GFX11-NEXT: flat_load_u8 v0, v[0:1] 360; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 361; GFX11-NEXT: s_setpc_b64 s[30:31] 362 %gep = getelementptr i8, i8* %p, i64 -4096 363 %load = load i8, i8* %gep, align 4 364 ret i8 %load 365} 366 367define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) { 368; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 369; GFX9: ; %bb.0: 370; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 372; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 373; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 374; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 375; GFX9-NEXT: s_setpc_b64 s[30:31] 376; 377; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 378; GFX10: ; %bb.0: 379; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 381; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 382; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 383; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 384; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 385; GFX10-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 388; GFX11: ; %bb.0: 389; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 391; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 392; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 393; GFX11-NEXT: flat_load_u8 v0, v[0:1] 394; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 395; GFX11-NEXT: s_setpc_b64 s[30:31] 396 %gep = getelementptr i8, i8* %p, i64 -8192 397 %load = load i8, i8* %gep, align 4 398 ret i8 %load 399} 400 401define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) { 402; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 403; GFX9: ; %bb.0: 404; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 405; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 406; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 407; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 408; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 409; GFX9-NEXT: s_setpc_b64 s[30:31] 410; 411; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 412; GFX10: ; %bb.0: 413; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 415; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 416; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 417; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 418; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 419; GFX10-NEXT: s_setpc_b64 s[30:31] 420; 421; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 422; GFX11: ; %bb.0: 423; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 424; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 425; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 426; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 427; GFX11-NEXT: flat_load_u8 v0, v[0:1] 428; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 429; GFX11-NEXT: s_setpc_b64 s[30:31] 430 %gep = getelementptr i8, i8* %p, i64 -16384 431 %load = load i8, i8* %gep, align 4 432 ret i8 %load 433} 434 435; Fill 11-bit low-bits (1ull << 33) | 2047 436define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) { 437; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 438; GFX9: ; %bb.0: 439; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 441; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 442; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 443; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 444; GFX9-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 447; GFX10: ; %bb.0: 448; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 450; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 451; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 452; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 453; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 454; GFX10-NEXT: s_setpc_b64 s[30:31] 455; 456; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 457; GFX11: ; %bb.0: 458; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 460; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 461; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 462; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 463; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 464; GFX11-NEXT: s_setpc_b64 s[30:31] 465 %gep = getelementptr i8, i8* %p, i64 8589936639 466 %load = load i8, i8* %gep, align 4 467 ret i8 %load 468} 469 470; Fill 11-bit low-bits (1ull << 33) | 2048 471define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) { 472; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 473; GFX9: ; %bb.0: 474; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 476; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 477; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 478; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 479; GFX9-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 482; GFX10: ; %bb.0: 483; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 485; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 486; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 487; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 488; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 489; GFX10-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 492; GFX11: ; %bb.0: 493; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 495; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 496; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 497; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2048 498; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 499; GFX11-NEXT: s_setpc_b64 s[30:31] 500 %gep = getelementptr i8, i8* %p, i64 8589936640 501 %load = load i8, i8* %gep, align 4 502 ret i8 %load 503} 504 505; Fill 12-bit low-bits (1ull << 33) | 4095 506define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) { 507; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 508; GFX9: ; %bb.0: 509; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 511; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 512; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 513; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 514; GFX9-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 517; GFX10: ; %bb.0: 518; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 520; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 521; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 522; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 523; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 524; GFX10-NEXT: s_setpc_b64 s[30:31] 525; 526; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 527; GFX11: ; %bb.0: 528; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 530; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 531; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 532; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 533; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 534; GFX11-NEXT: s_setpc_b64 s[30:31] 535 %gep = getelementptr i8, i8* %p, i64 8589938687 536 %load = load i8, i8* %gep, align 4 537 ret i8 %load 538} 539 540; Fill 12-bit low-bits (1ull << 33) | 4096 541define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) { 542; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 543; GFX9: ; %bb.0: 544; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 546; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 547; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 548; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 549; GFX9-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 552; GFX10: ; %bb.0: 553; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 555; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 556; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 557; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 558; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 559; GFX10-NEXT: s_setpc_b64 s[30:31] 560; 561; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 562; GFX11: ; %bb.0: 563; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 564; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 565; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 566; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 567; GFX11-NEXT: flat_load_u8 v0, v[0:1] 568; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 569; GFX11-NEXT: s_setpc_b64 s[30:31] 570 %gep = getelementptr i8, i8* %p, i64 8589938688 571 %load = load i8, i8* %gep, align 4 572 ret i8 %load 573} 574 575; Fill 13-bit low-bits (1ull << 33) | 8191 576define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) { 577; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 578; GFX9: ; %bb.0: 579; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 581; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 582; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 583; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 584; GFX9-NEXT: s_setpc_b64 s[30:31] 585; 586; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 587; GFX10: ; %bb.0: 588; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 590; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 591; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 592; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 593; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 594; GFX10-NEXT: s_setpc_b64 s[30:31] 595; 596; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 597; GFX11: ; %bb.0: 598; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 600; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 601; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 602; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 603; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 604; GFX11-NEXT: s_setpc_b64 s[30:31] 605 %gep = getelementptr i8, i8* %p, i64 8589942783 606 %load = load i8, i8* %gep, align 4 607 ret i8 %load 608} 609 610; Fill 13-bit low-bits (1ull << 33) | 8192 611define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) { 612; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 613; GFX9: ; %bb.0: 614; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 616; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 617; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 618; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 619; GFX9-NEXT: s_setpc_b64 s[30:31] 620; 621; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 622; GFX10: ; %bb.0: 623; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 625; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 626; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 627; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 628; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 629; GFX10-NEXT: s_setpc_b64 s[30:31] 630; 631; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 632; GFX11: ; %bb.0: 633; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 635; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 636; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 637; GFX11-NEXT: flat_load_u8 v0, v[0:1] 638; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 639; GFX11-NEXT: s_setpc_b64 s[30:31] 640 %gep = getelementptr i8, i8* %p, i64 8589942784 641 %load = load i8, i8* %gep, align 4 642 ret i8 %load 643} 644 645; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 646define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) { 647; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 648; GFX9: ; %bb.0: 649; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 651; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 652; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 653; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 654; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 655; GFX9-NEXT: s_setpc_b64 s[30:31] 656; 657; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 658; GFX10: ; %bb.0: 659; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 660; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 661; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 662; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 663; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 664; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 665; GFX10-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 668; GFX11: ; %bb.0: 669; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 671; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 672; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 673; GFX11-NEXT: flat_load_u8 v0, v[0:1] 674; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 675; GFX11-NEXT: s_setpc_b64 s[30:31] 676 %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 677 %load = load i8, i8* %gep, align 4 678 ret i8 %load 679} 680 681; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 682define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) { 683; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 684; GFX9: ; %bb.0: 685; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 686; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 687; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 688; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 689; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 690; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 691; GFX9-NEXT: s_setpc_b64 s[30:31] 692; 693; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 694; GFX10: ; %bb.0: 695; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 697; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 698; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 699; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 700; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 701; GFX10-NEXT: s_setpc_b64 s[30:31] 702; 703; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 704; GFX11: ; %bb.0: 705; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 707; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 708; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 709; GFX11-NEXT: flat_load_u8 v0, v[0:1] 710; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 711; GFX11-NEXT: s_setpc_b64 s[30:31] 712 %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 713 %load = load i8, i8* %gep, align 4 714 ret i8 %load 715} 716 717; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 718define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) { 719; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 720; GFX9: ; %bb.0: 721; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 723; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 724; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 725; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 726; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 727; GFX9-NEXT: s_setpc_b64 s[30:31] 728; 729; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 730; GFX10: ; %bb.0: 731; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 732; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 733; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 734; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 735; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 736; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 737; GFX10-NEXT: s_setpc_b64 s[30:31] 738; 739; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 740; GFX11: ; %bb.0: 741; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 742; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 743; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 744; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 745; GFX11-NEXT: flat_load_u8 v0, v[0:1] 746; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 747; GFX11-NEXT: s_setpc_b64 s[30:31] 748 %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 749 %load = load i8, i8* %gep, align 4 750 ret i8 %load 751} 752 753; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 754define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) { 755; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 756; GFX9: ; %bb.0: 757; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 758; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 759; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 760; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 761; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 762; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 763; GFX9-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 766; GFX10: ; %bb.0: 767; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 769; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 770; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 771; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 772; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 773; GFX10-NEXT: s_setpc_b64 s[30:31] 774; 775; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 776; GFX11: ; %bb.0: 777; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 778; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 779; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 780; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 781; GFX11-NEXT: flat_load_u8 v0, v[0:1] 782; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 783; GFX11-NEXT: s_setpc_b64 s[30:31] 784 %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 785 %load = load i8, i8* %gep, align 4 786 ret i8 %load 787} 788 789; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 790define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) { 791; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 792; GFX9: ; %bb.0: 793; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 794; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 795; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 796; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 797; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 798; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 799; GFX9-NEXT: s_setpc_b64 s[30:31] 800; 801; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 802; GFX10: ; %bb.0: 803; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 804; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 805; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 806; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 807; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 808; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 809; GFX10-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 812; GFX11: ; %bb.0: 813; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 815; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 816; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 817; GFX11-NEXT: flat_load_u8 v0, v[0:1] 818; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 819; GFX11-NEXT: s_setpc_b64 s[30:31] 820 %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 821 %load = load i8, i8* %gep, align 4 822 ret i8 %load 823} 824 825; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 826define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) { 827; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 828; GFX9: ; %bb.0: 829; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 831; GFX9-NEXT: v_bfrev_b32_e32 v2, 1 832; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 833; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 834; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 835; GFX9-NEXT: s_setpc_b64 s[30:31] 836; 837; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 838; GFX10: ; %bb.0: 839; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 840; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 841; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 842; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 843; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 844; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 845; GFX10-NEXT: s_setpc_b64 s[30:31] 846; 847; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 848; GFX11: ; %bb.0: 849; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 851; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 852; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 853; GFX11-NEXT: flat_load_u8 v0, v[0:1] 854; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 855; GFX11-NEXT: s_setpc_b64 s[30:31] 856 %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 857 %load = load i8, i8* %gep, align 4 858 ret i8 %load 859} 860 861define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) { 862; GFX9-LABEL: flat_inst_salu_offset_1: 863; GFX9: ; %bb.0: 864; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 865; GFX9-NEXT: s_waitcnt lgkmcnt(0) 866; GFX9-NEXT: v_mov_b32_e32 v0, s0 867; GFX9-NEXT: v_mov_b32_e32 v1, s1 868; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 glc 869; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 870; GFX9-NEXT: flat_store_byte v[0:1], v0 871; GFX9-NEXT: s_endpgm 872; 873; GFX10-LABEL: flat_inst_salu_offset_1: 874; GFX10: ; %bb.0: 875; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 876; GFX10-NEXT: s_waitcnt lgkmcnt(0) 877; GFX10-NEXT: s_add_u32 s0, s0, 1 878; GFX10-NEXT: s_addc_u32 s1, s1, 0 879; GFX10-NEXT: v_mov_b32_e32 v0, s0 880; GFX10-NEXT: v_mov_b32_e32 v1, s1 881; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 882; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 883; GFX10-NEXT: flat_store_byte v[0:1], v0 884; GFX10-NEXT: s_endpgm 885; 886; GFX11-LABEL: flat_inst_salu_offset_1: 887; GFX11: ; %bb.0: 888; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 889; GFX11-NEXT: s_waitcnt lgkmcnt(0) 890; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 891; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc 892; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 893; GFX11-NEXT: flat_store_b8 v[0:1], v0 894; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 895; GFX11-NEXT: s_endpgm 896 %gep = getelementptr i8, i8* %p, i64 1 897 %load = load volatile i8, i8* %gep, align 1 898 store i8 %load, i8* undef 899 ret void 900} 901 902define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) { 903; GFX9-LABEL: flat_inst_salu_offset_11bit_max: 904; GFX9: ; %bb.0: 905; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 906; GFX9-NEXT: s_waitcnt lgkmcnt(0) 907; GFX9-NEXT: v_mov_b32_e32 v0, s0 908; GFX9-NEXT: v_mov_b32_e32 v1, s1 909; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc 910; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 911; GFX9-NEXT: flat_store_byte v[0:1], v0 912; GFX9-NEXT: s_endpgm 913; 914; GFX10-LABEL: flat_inst_salu_offset_11bit_max: 915; GFX10: ; %bb.0: 916; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 917; GFX10-NEXT: s_waitcnt lgkmcnt(0) 918; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 919; GFX10-NEXT: s_addc_u32 s1, s1, 0 920; GFX10-NEXT: v_mov_b32_e32 v0, s0 921; GFX10-NEXT: v_mov_b32_e32 v1, s1 922; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 923; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 924; GFX10-NEXT: flat_store_byte v[0:1], v0 925; GFX10-NEXT: s_endpgm 926; 927; GFX11-LABEL: flat_inst_salu_offset_11bit_max: 928; GFX11: ; %bb.0: 929; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 930; GFX11-NEXT: s_waitcnt lgkmcnt(0) 931; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 932; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc 933; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 934; GFX11-NEXT: flat_store_b8 v[0:1], v0 935; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 936; GFX11-NEXT: s_endpgm 937 %gep = getelementptr i8, i8* %p, i64 2047 938 %load = load volatile i8, i8* %gep, align 1 939 store i8 %load, i8* undef 940 ret void 941} 942 943define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) { 944; GFX9-LABEL: flat_inst_salu_offset_12bit_max: 945; GFX9: ; %bb.0: 946; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 947; GFX9-NEXT: s_waitcnt lgkmcnt(0) 948; GFX9-NEXT: v_mov_b32_e32 v0, s0 949; GFX9-NEXT: v_mov_b32_e32 v1, s1 950; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 951; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 952; GFX9-NEXT: flat_store_byte v[0:1], v0 953; GFX9-NEXT: s_endpgm 954; 955; GFX10-LABEL: flat_inst_salu_offset_12bit_max: 956; GFX10: ; %bb.0: 957; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 958; GFX10-NEXT: s_waitcnt lgkmcnt(0) 959; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 960; GFX10-NEXT: s_addc_u32 s1, s1, 0 961; GFX10-NEXT: v_mov_b32_e32 v0, s0 962; GFX10-NEXT: v_mov_b32_e32 v1, s1 963; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 964; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 965; GFX10-NEXT: flat_store_byte v[0:1], v0 966; GFX10-NEXT: s_endpgm 967; 968; GFX11-LABEL: flat_inst_salu_offset_12bit_max: 969; GFX11: ; %bb.0: 970; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 971; GFX11-NEXT: s_waitcnt lgkmcnt(0) 972; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 973; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 974; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 975; GFX11-NEXT: flat_store_b8 v[0:1], v0 976; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 977; GFX11-NEXT: s_endpgm 978 %gep = getelementptr i8, i8* %p, i64 4095 979 %load = load volatile i8, i8* %gep, align 1 980 store i8 %load, i8* undef 981 ret void 982} 983 984define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) { 985; GFX9-LABEL: flat_inst_salu_offset_13bit_max: 986; GFX9: ; %bb.0: 987; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 988; GFX9-NEXT: s_waitcnt lgkmcnt(0) 989; GFX9-NEXT: v_mov_b32_e32 v0, s0 990; GFX9-NEXT: v_mov_b32_e32 v1, s1 991; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 992; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 993; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 994; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 995; GFX9-NEXT: flat_store_byte v[0:1], v0 996; GFX9-NEXT: s_endpgm 997; 998; GFX10-LABEL: flat_inst_salu_offset_13bit_max: 999; GFX10: ; %bb.0: 1000; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1001; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1002; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1003; GFX10-NEXT: s_addc_u32 s1, s1, 0 1004; GFX10-NEXT: v_mov_b32_e32 v0, s0 1005; GFX10-NEXT: v_mov_b32_e32 v1, s1 1006; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1007; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1008; GFX10-NEXT: flat_store_byte v[0:1], v0 1009; GFX10-NEXT: s_endpgm 1010; 1011; GFX11-LABEL: flat_inst_salu_offset_13bit_max: 1012; GFX11: ; %bb.0: 1013; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1014; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1015; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1016; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1017; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 1018; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1019; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1020; GFX11-NEXT: flat_store_b8 v[0:1], v0 1021; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1022; GFX11-NEXT: s_endpgm 1023 %gep = getelementptr i8, i8* %p, i64 8191 1024 %load = load volatile i8, i8* %gep, align 1 1025 store i8 %load, i8* undef 1026 ret void 1027} 1028 1029define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) { 1030; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max: 1031; GFX9: ; %bb.0: 1032; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1033; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1034; GFX9-NEXT: v_mov_b32_e32 v0, s0 1035; GFX9-NEXT: v_mov_b32_e32 v1, s1 1036; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 1037; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1038; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1039; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1040; GFX9-NEXT: flat_store_byte v[0:1], v0 1041; GFX9-NEXT: s_endpgm 1042; 1043; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max: 1044; GFX10: ; %bb.0: 1045; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1046; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1047; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff800 1048; GFX10-NEXT: s_addc_u32 s1, s1, -1 1049; GFX10-NEXT: v_mov_b32_e32 v0, s0 1050; GFX10-NEXT: v_mov_b32_e32 v1, s1 1051; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1052; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1053; GFX10-NEXT: flat_store_byte v[0:1], v0 1054; GFX10-NEXT: s_endpgm 1055; 1056; GFX11-LABEL: flat_inst_salu_offset_neg_11bit_max: 1057; GFX11: ; %bb.0: 1058; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1059; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1060; GFX11-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0 1061; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1062; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1063; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1064; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1065; GFX11-NEXT: flat_store_b8 v[0:1], v0 1066; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1067; GFX11-NEXT: s_endpgm 1068 %gep = getelementptr i8, i8* %p, i64 -2048 1069 %load = load volatile i8, i8* %gep, align 1 1070 store i8 %load, i8* undef 1071 ret void 1072} 1073 1074define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) { 1075; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max: 1076; GFX9: ; %bb.0: 1077; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1078; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1079; GFX9-NEXT: v_mov_b32_e32 v0, s0 1080; GFX9-NEXT: v_mov_b32_e32 v1, s1 1081; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 1082; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1083; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1084; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1085; GFX9-NEXT: flat_store_byte v[0:1], v0 1086; GFX9-NEXT: s_endpgm 1087; 1088; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max: 1089; GFX10: ; %bb.0: 1090; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1091; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1092; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 1093; GFX10-NEXT: s_addc_u32 s1, s1, -1 1094; GFX10-NEXT: v_mov_b32_e32 v0, s0 1095; GFX10-NEXT: v_mov_b32_e32 v1, s1 1096; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1097; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1098; GFX10-NEXT: flat_store_byte v[0:1], v0 1099; GFX10-NEXT: s_endpgm 1100; 1101; GFX11-LABEL: flat_inst_salu_offset_neg_12bit_max: 1102; GFX11: ; %bb.0: 1103; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1104; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1105; GFX11-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 1106; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1107; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1108; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1109; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1110; GFX11-NEXT: flat_store_b8 v[0:1], v0 1111; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1112; GFX11-NEXT: s_endpgm 1113 %gep = getelementptr i8, i8* %p, i64 -4096 1114 %load = load volatile i8, i8* %gep, align 1 1115 store i8 %load, i8* undef 1116 ret void 1117} 1118 1119define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) { 1120; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max: 1121; GFX9: ; %bb.0: 1122; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1123; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1124; GFX9-NEXT: v_mov_b32_e32 v0, s0 1125; GFX9-NEXT: v_mov_b32_e32 v1, s1 1126; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 1127; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1128; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1129; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1130; GFX9-NEXT: flat_store_byte v[0:1], v0 1131; GFX9-NEXT: s_endpgm 1132; 1133; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max: 1134; GFX10: ; %bb.0: 1135; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1136; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1137; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 1138; GFX10-NEXT: s_addc_u32 s1, s1, -1 1139; GFX10-NEXT: v_mov_b32_e32 v0, s0 1140; GFX10-NEXT: v_mov_b32_e32 v1, s1 1141; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1142; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1143; GFX10-NEXT: flat_store_byte v[0:1], v0 1144; GFX10-NEXT: s_endpgm 1145; 1146; GFX11-LABEL: flat_inst_salu_offset_neg_13bit_max: 1147; GFX11: ; %bb.0: 1148; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1149; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1150; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1151; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1152; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1153; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1154; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1155; GFX11-NEXT: flat_store_b8 v[0:1], v0 1156; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1157; GFX11-NEXT: s_endpgm 1158 %gep = getelementptr i8, i8* %p, i64 -8192 1159 %load = load volatile i8, i8* %gep, align 1 1160 store i8 %load, i8* undef 1161 ret void 1162} 1163 1164define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) { 1165; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max: 1166; GFX9: ; %bb.0: 1167; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1168; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1169; GFX9-NEXT: v_mov_b32_e32 v0, s0 1170; GFX9-NEXT: v_mov_b32_e32 v1, s1 1171; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1172; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1173; GFX9-NEXT: flat_store_byte v[0:1], v0 1174; GFX9-NEXT: s_endpgm 1175; 1176; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max: 1177; GFX10: ; %bb.0: 1178; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1179; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1180; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1181; GFX10-NEXT: s_addc_u32 s1, s1, 0 1182; GFX10-NEXT: v_mov_b32_e32 v0, s0 1183; GFX10-NEXT: v_mov_b32_e32 v1, s1 1184; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1185; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1186; GFX10-NEXT: flat_store_byte v[0:1], v0 1187; GFX10-NEXT: s_endpgm 1188; 1189; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max: 1190; GFX11: ; %bb.0: 1191; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1192; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1193; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1194; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1195; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1196; GFX11-NEXT: flat_store_b8 v[0:1], v0 1197; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1198; GFX11-NEXT: s_endpgm 1199 %gep = getelementptr i8, i8* %p, i64 4095 1200 %load = load volatile i8, i8* %gep, align 1 1201 store i8 %load, i8* undef 1202 ret void 1203} 1204 1205define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) { 1206; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max: 1207; GFX9: ; %bb.0: 1208; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1209; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1210; GFX9-NEXT: v_mov_b32_e32 v0, s0 1211; GFX9-NEXT: v_mov_b32_e32 v1, s1 1212; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1213; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 1214; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1215; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1216; GFX9-NEXT: flat_store_byte v[0:1], v0 1217; GFX9-NEXT: s_endpgm 1218; 1219; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max: 1220; GFX10: ; %bb.0: 1221; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1222; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1223; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1224; GFX10-NEXT: s_addc_u32 s1, s1, 0 1225; GFX10-NEXT: v_mov_b32_e32 v0, s0 1226; GFX10-NEXT: v_mov_b32_e32 v1, s1 1227; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1228; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1229; GFX10-NEXT: flat_store_byte v[0:1], v0 1230; GFX10-NEXT: s_endpgm 1231; 1232; GFX11-LABEL: flat_inst_salu_offset_2x_12bit_max: 1233; GFX11: ; %bb.0: 1234; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1235; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1236; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1237; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1238; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 1239; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1240; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1241; GFX11-NEXT: flat_store_b8 v[0:1], v0 1242; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1243; GFX11-NEXT: s_endpgm 1244 %gep = getelementptr i8, i8* %p, i64 8191 1245 %load = load volatile i8, i8* %gep, align 1 1246 store i8 %load, i8* undef 1247 ret void 1248} 1249 1250define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) { 1251; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max: 1252; GFX9: ; %bb.0: 1253; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1254; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1255; GFX9-NEXT: v_mov_b32_e32 v0, s0 1256; GFX9-NEXT: v_mov_b32_e32 v1, s1 1257; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 1258; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 1259; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1260; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1261; GFX9-NEXT: flat_store_byte v[0:1], v0 1262; GFX9-NEXT: s_endpgm 1263; 1264; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max: 1265; GFX10: ; %bb.0: 1266; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1267; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1268; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff 1269; GFX10-NEXT: s_addc_u32 s1, s1, 0 1270; GFX10-NEXT: v_mov_b32_e32 v0, s0 1271; GFX10-NEXT: v_mov_b32_e32 v1, s1 1272; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1273; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1274; GFX10-NEXT: flat_store_byte v[0:1], v0 1275; GFX10-NEXT: s_endpgm 1276; 1277; GFX11-LABEL: flat_inst_salu_offset_2x_13bit_max: 1278; GFX11: ; %bb.0: 1279; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1280; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1281; GFX11-NEXT: v_add_co_u32 v0, s0, 0x3000, s0 1282; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1283; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 1284; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1285; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1286; GFX11-NEXT: flat_store_b8 v[0:1], v0 1287; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1288; GFX11-NEXT: s_endpgm 1289 %gep = getelementptr i8, i8* %p, i64 16383 1290 %load = load volatile i8, i8* %gep, align 1 1291 store i8 %load, i8* undef 1292 ret void 1293} 1294 1295define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) { 1296; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 1297; GFX9: ; %bb.0: 1298; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1299; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1300; GFX9-NEXT: v_mov_b32_e32 v0, s0 1301; GFX9-NEXT: v_mov_b32_e32 v1, s1 1302; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 1303; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1304; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1305; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1306; GFX9-NEXT: flat_store_byte v[0:1], v0 1307; GFX9-NEXT: s_endpgm 1308; 1309; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 1310; GFX10: ; %bb.0: 1311; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1312; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1313; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 1314; GFX10-NEXT: s_addc_u32 s1, s1, -1 1315; GFX10-NEXT: v_mov_b32_e32 v0, s0 1316; GFX10-NEXT: v_mov_b32_e32 v1, s1 1317; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1318; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1319; GFX10-NEXT: flat_store_byte v[0:1], v0 1320; GFX10-NEXT: s_endpgm 1321; 1322; GFX11-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 1323; GFX11: ; %bb.0: 1324; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1325; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1326; GFX11-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 1327; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1328; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1329; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1330; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1331; GFX11-NEXT: flat_store_b8 v[0:1], v0 1332; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1333; GFX11-NEXT: s_endpgm 1334 %gep = getelementptr i8, i8* %p, i64 -4096 1335 %load = load volatile i8, i8* %gep, align 1 1336 store i8 %load, i8* undef 1337 ret void 1338} 1339 1340define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) { 1341; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 1342; GFX9: ; %bb.0: 1343; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1344; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1345; GFX9-NEXT: v_mov_b32_e32 v0, s0 1346; GFX9-NEXT: v_mov_b32_e32 v1, s1 1347; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 1348; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1349; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1350; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1351; GFX9-NEXT: flat_store_byte v[0:1], v0 1352; GFX9-NEXT: s_endpgm 1353; 1354; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 1355; GFX10: ; %bb.0: 1356; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1357; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1358; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 1359; GFX10-NEXT: s_addc_u32 s1, s1, -1 1360; GFX10-NEXT: v_mov_b32_e32 v0, s0 1361; GFX10-NEXT: v_mov_b32_e32 v1, s1 1362; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1363; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1364; GFX10-NEXT: flat_store_byte v[0:1], v0 1365; GFX10-NEXT: s_endpgm 1366; 1367; GFX11-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 1368; GFX11: ; %bb.0: 1369; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1370; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1371; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 1372; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1373; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1374; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1375; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1376; GFX11-NEXT: flat_store_b8 v[0:1], v0 1377; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1378; GFX11-NEXT: s_endpgm 1379 %gep = getelementptr i8, i8* %p, i64 -8192 1380 %load = load volatile i8, i8* %gep, align 1 1381 store i8 %load, i8* undef 1382 ret void 1383} 1384 1385define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) { 1386; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 1387; GFX9: ; %bb.0: 1388; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1389; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1390; GFX9-NEXT: v_mov_b32_e32 v0, s0 1391; GFX9-NEXT: v_mov_b32_e32 v1, s1 1392; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 1393; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1394; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1395; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1396; GFX9-NEXT: flat_store_byte v[0:1], v0 1397; GFX9-NEXT: s_endpgm 1398; 1399; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 1400; GFX10: ; %bb.0: 1401; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1402; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1403; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000 1404; GFX10-NEXT: s_addc_u32 s1, s1, -1 1405; GFX10-NEXT: v_mov_b32_e32 v0, s0 1406; GFX10-NEXT: v_mov_b32_e32 v1, s1 1407; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1408; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1409; GFX10-NEXT: flat_store_byte v[0:1], v0 1410; GFX10-NEXT: s_endpgm 1411; 1412; GFX11-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 1413; GFX11: ; %bb.0: 1414; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1415; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1416; GFX11-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 1417; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1418; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1419; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1420; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1421; GFX11-NEXT: flat_store_b8 v[0:1], v0 1422; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1423; GFX11-NEXT: s_endpgm 1424 %gep = getelementptr i8, i8* %p, i64 -16384 1425 %load = load volatile i8, i8* %gep, align 1 1426 store i8 %load, i8* undef 1427 ret void 1428} 1429 1430; Fill 11-bit low-bits (1ull << 33) | 2047 1431define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) { 1432; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 1433; GFX9: ; %bb.0: 1434; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1435; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1436; GFX9-NEXT: v_mov_b32_e32 v1, s1 1437; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1438; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1439; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc 1440; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1441; GFX9-NEXT: flat_store_byte v[0:1], v0 1442; GFX9-NEXT: s_endpgm 1443; 1444; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 1445; GFX10: ; %bb.0: 1446; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1447; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1448; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1449; GFX10-NEXT: s_addc_u32 s1, s1, 2 1450; GFX10-NEXT: v_mov_b32_e32 v0, s0 1451; GFX10-NEXT: v_mov_b32_e32 v1, s1 1452; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1453; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1454; GFX10-NEXT: flat_store_byte v[0:1], v0 1455; GFX10-NEXT: s_endpgm 1456; 1457; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 1458; GFX11: ; %bb.0: 1459; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1460; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1461; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1462; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1463; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1464; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc 1465; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1466; GFX11-NEXT: flat_store_b8 v[0:1], v0 1467; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1468; GFX11-NEXT: s_endpgm 1469 %gep = getelementptr i8, i8* %p, i64 8589936639 1470 %load = load volatile i8, i8* %gep, align 1 1471 store i8 %load, i8* undef 1472 ret void 1473} 1474 1475; Fill 11-bit low-bits (1ull << 33) | 2048 1476define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) { 1477; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 1478; GFX9: ; %bb.0: 1479; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1480; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1481; GFX9-NEXT: v_mov_b32_e32 v1, s1 1482; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1483; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1484; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 glc 1485; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1486; GFX9-NEXT: flat_store_byte v[0:1], v0 1487; GFX9-NEXT: s_endpgm 1488; 1489; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 1490; GFX10: ; %bb.0: 1491; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1492; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1493; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1494; GFX10-NEXT: s_addc_u32 s1, s1, 2 1495; GFX10-NEXT: v_mov_b32_e32 v0, s0 1496; GFX10-NEXT: v_mov_b32_e32 v1, s1 1497; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1498; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1499; GFX10-NEXT: flat_store_byte v[0:1], v0 1500; GFX10-NEXT: s_endpgm 1501; 1502; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 1503; GFX11: ; %bb.0: 1504; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1505; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1506; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1507; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1508; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1509; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc 1510; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1511; GFX11-NEXT: flat_store_b8 v[0:1], v0 1512; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1513; GFX11-NEXT: s_endpgm 1514 %gep = getelementptr i8, i8* %p, i64 8589936640 1515 %load = load volatile i8, i8* %gep, align 1 1516 store i8 %load, i8* undef 1517 ret void 1518} 1519 1520; Fill 12-bit low-bits (1ull << 33) | 4095 1521define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) { 1522; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 1523; GFX9: ; %bb.0: 1524; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1525; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1526; GFX9-NEXT: v_mov_b32_e32 v1, s1 1527; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 1528; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1529; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1530; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1531; GFX9-NEXT: flat_store_byte v[0:1], v0 1532; GFX9-NEXT: s_endpgm 1533; 1534; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 1535; GFX10: ; %bb.0: 1536; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1537; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1538; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1539; GFX10-NEXT: s_addc_u32 s1, s1, 2 1540; GFX10-NEXT: v_mov_b32_e32 v0, s0 1541; GFX10-NEXT: v_mov_b32_e32 v1, s1 1542; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1543; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1544; GFX10-NEXT: flat_store_byte v[0:1], v0 1545; GFX10-NEXT: s_endpgm 1546; 1547; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 1548; GFX11: ; %bb.0: 1549; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1550; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1551; GFX11-NEXT: v_add_co_u32 v0, s0, 0, s0 1552; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1553; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1554; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1555; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1556; GFX11-NEXT: flat_store_b8 v[0:1], v0 1557; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1558; GFX11-NEXT: s_endpgm 1559 %gep = getelementptr i8, i8* %p, i64 8589938687 1560 %load = load volatile i8, i8* %gep, align 1 1561 store i8 %load, i8* undef 1562 ret void 1563} 1564 1565; Fill 12-bit low-bits (1ull << 33) | 4096 1566define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) { 1567; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 1568; GFX9: ; %bb.0: 1569; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1570; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1571; GFX9-NEXT: v_mov_b32_e32 v0, s0 1572; GFX9-NEXT: v_mov_b32_e32 v1, s1 1573; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1574; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1575; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1576; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1577; GFX9-NEXT: flat_store_byte v[0:1], v0 1578; GFX9-NEXT: s_endpgm 1579; 1580; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 1581; GFX10: ; %bb.0: 1582; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1583; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1584; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1585; GFX10-NEXT: s_addc_u32 s1, s1, 2 1586; GFX10-NEXT: v_mov_b32_e32 v0, s0 1587; GFX10-NEXT: v_mov_b32_e32 v1, s1 1588; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1589; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1590; GFX10-NEXT: flat_store_byte v[0:1], v0 1591; GFX10-NEXT: s_endpgm 1592; 1593; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 1594; GFX11: ; %bb.0: 1595; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1596; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1597; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1598; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1599; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1600; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1601; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1602; GFX11-NEXT: flat_store_b8 v[0:1], v0 1603; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1604; GFX11-NEXT: s_endpgm 1605 %gep = getelementptr i8, i8* %p, i64 8589938688 1606 %load = load volatile i8, i8* %gep, align 1 1607 store i8 %load, i8* undef 1608 ret void 1609} 1610 1611; Fill 13-bit low-bits (1ull << 33) | 8191 1612define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) { 1613; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 1614; GFX9: ; %bb.0: 1615; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1616; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1617; GFX9-NEXT: v_mov_b32_e32 v0, s0 1618; GFX9-NEXT: v_mov_b32_e32 v1, s1 1619; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1620; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1621; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1622; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1623; GFX9-NEXT: flat_store_byte v[0:1], v0 1624; GFX9-NEXT: s_endpgm 1625; 1626; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 1627; GFX10: ; %bb.0: 1628; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1629; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1630; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1631; GFX10-NEXT: s_addc_u32 s1, s1, 2 1632; GFX10-NEXT: v_mov_b32_e32 v0, s0 1633; GFX10-NEXT: v_mov_b32_e32 v1, s1 1634; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1635; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1636; GFX10-NEXT: flat_store_byte v[0:1], v0 1637; GFX10-NEXT: s_endpgm 1638; 1639; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 1640; GFX11: ; %bb.0: 1641; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1642; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1643; GFX11-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1644; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1645; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1646; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1647; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1648; GFX11-NEXT: flat_store_b8 v[0:1], v0 1649; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1650; GFX11-NEXT: s_endpgm 1651 %gep = getelementptr i8, i8* %p, i64 8589942783 1652 %load = load volatile i8, i8* %gep, align 1 1653 store i8 %load, i8* undef 1654 ret void 1655} 1656 1657; Fill 13-bit low-bits (1ull << 33) | 8192 1658define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) { 1659; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 1660; GFX9: ; %bb.0: 1661; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1662; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1663; GFX9-NEXT: v_mov_b32_e32 v0, s0 1664; GFX9-NEXT: v_mov_b32_e32 v1, s1 1665; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1666; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1667; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1668; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1669; GFX9-NEXT: flat_store_byte v[0:1], v0 1670; GFX9-NEXT: s_endpgm 1671; 1672; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 1673; GFX10: ; %bb.0: 1674; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1675; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1676; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1677; GFX10-NEXT: s_addc_u32 s1, s1, 2 1678; GFX10-NEXT: v_mov_b32_e32 v0, s0 1679; GFX10-NEXT: v_mov_b32_e32 v1, s1 1680; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1681; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1682; GFX10-NEXT: flat_store_byte v[0:1], v0 1683; GFX10-NEXT: s_endpgm 1684; 1685; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 1686; GFX11: ; %bb.0: 1687; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1688; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1689; GFX11-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 1690; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1691; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 1692; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1693; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1694; GFX11-NEXT: flat_store_b8 v[0:1], v0 1695; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1696; GFX11-NEXT: s_endpgm 1697 %gep = getelementptr i8, i8* %p, i64 8589942784 1698 %load = load volatile i8, i8* %gep, align 1 1699 store i8 %load, i8* undef 1700 ret void 1701} 1702 1703; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1704define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) { 1705; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 1706; GFX9: ; %bb.0: 1707; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1708; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1709; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1710; GFX9-NEXT: v_mov_b32_e32 v0, s0 1711; GFX9-NEXT: v_mov_b32_e32 v2, s1 1712; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 1713; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1714; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1715; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1716; GFX9-NEXT: flat_store_byte v[0:1], v0 1717; GFX9-NEXT: s_endpgm 1718; 1719; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 1720; GFX10: ; %bb.0: 1721; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1722; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1723; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1724; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1725; GFX10-NEXT: v_mov_b32_e32 v0, s0 1726; GFX10-NEXT: v_mov_b32_e32 v1, s1 1727; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1728; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1729; GFX10-NEXT: flat_store_byte v[0:1], v0 1730; GFX10-NEXT: s_endpgm 1731; 1732; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 1733; GFX11: ; %bb.0: 1734; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1735; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1736; GFX11-NEXT: v_mov_b32_e32 v1, s1 1737; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0 1738; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1739; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1740; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1741; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1742; GFX11-NEXT: flat_store_b8 v[0:1], v0 1743; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1744; GFX11-NEXT: s_endpgm 1745 %gep = getelementptr i8, i8* %p, i64 -9223372036854773761 1746 %load = load volatile i8, i8* %gep, align 1 1747 store i8 %load, i8* undef 1748 ret void 1749} 1750 1751; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1752define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) { 1753; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 1754; GFX9: ; %bb.0: 1755; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1756; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1757; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1758; GFX9-NEXT: v_mov_b32_e32 v0, s0 1759; GFX9-NEXT: v_mov_b32_e32 v2, s1 1760; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1761; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1762; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1763; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1764; GFX9-NEXT: flat_store_byte v[0:1], v0 1765; GFX9-NEXT: s_endpgm 1766; 1767; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 1768; GFX10: ; %bb.0: 1769; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1770; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1771; GFX10-NEXT: s_add_u32 s0, s0, 0x800 1772; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1773; GFX10-NEXT: v_mov_b32_e32 v0, s0 1774; GFX10-NEXT: v_mov_b32_e32 v1, s1 1775; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1776; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1777; GFX10-NEXT: flat_store_byte v[0:1], v0 1778; GFX10-NEXT: s_endpgm 1779; 1780; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 1781; GFX11: ; %bb.0: 1782; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1783; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1784; GFX11-NEXT: v_mov_b32_e32 v1, s1 1785; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 1786; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1787; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1788; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1789; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1790; GFX11-NEXT: flat_store_b8 v[0:1], v0 1791; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1792; GFX11-NEXT: s_endpgm 1793 %gep = getelementptr i8, i8* %p, i64 -9223372036854773760 1794 %load = load volatile i8, i8* %gep, align 1 1795 store i8 %load, i8* undef 1796 ret void 1797} 1798 1799; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1800define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) { 1801; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 1802; GFX9: ; %bb.0: 1803; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1804; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1805; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1806; GFX9-NEXT: v_mov_b32_e32 v0, s0 1807; GFX9-NEXT: v_mov_b32_e32 v2, s1 1808; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1809; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1810; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1811; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1812; GFX9-NEXT: flat_store_byte v[0:1], v0 1813; GFX9-NEXT: s_endpgm 1814; 1815; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 1816; GFX10: ; %bb.0: 1817; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1818; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1819; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1820; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1821; GFX10-NEXT: v_mov_b32_e32 v0, s0 1822; GFX10-NEXT: v_mov_b32_e32 v1, s1 1823; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1824; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1825; GFX10-NEXT: flat_store_byte v[0:1], v0 1826; GFX10-NEXT: s_endpgm 1827; 1828; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 1829; GFX11: ; %bb.0: 1830; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1831; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1832; GFX11-NEXT: v_mov_b32_e32 v1, s1 1833; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0 1834; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1835; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1836; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1837; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1838; GFX11-NEXT: flat_store_b8 v[0:1], v0 1839; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1840; GFX11-NEXT: s_endpgm 1841 %gep = getelementptr i8, i8* %p, i64 -9223372036854771713 1842 %load = load volatile i8, i8* %gep, align 1 1843 store i8 %load, i8* undef 1844 ret void 1845} 1846 1847; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1848define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) { 1849; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 1850; GFX9: ; %bb.0: 1851; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1852; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1853; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1854; GFX9-NEXT: v_mov_b32_e32 v0, s0 1855; GFX9-NEXT: v_mov_b32_e32 v2, s1 1856; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1857; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1858; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1859; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1860; GFX9-NEXT: flat_store_byte v[0:1], v0 1861; GFX9-NEXT: s_endpgm 1862; 1863; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 1864; GFX10: ; %bb.0: 1865; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1866; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1867; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 1868; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1869; GFX10-NEXT: v_mov_b32_e32 v0, s0 1870; GFX10-NEXT: v_mov_b32_e32 v1, s1 1871; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1872; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1873; GFX10-NEXT: flat_store_byte v[0:1], v0 1874; GFX10-NEXT: s_endpgm 1875; 1876; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 1877; GFX11: ; %bb.0: 1878; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1879; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1880; GFX11-NEXT: v_mov_b32_e32 v1, s1 1881; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 1882; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1883; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1884; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1885; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1886; GFX11-NEXT: flat_store_b8 v[0:1], v0 1887; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1888; GFX11-NEXT: s_endpgm 1889 %gep = getelementptr i8, i8* %p, i64 -9223372036854771712 1890 %load = load volatile i8, i8* %gep, align 1 1891 store i8 %load, i8* undef 1892 ret void 1893} 1894 1895; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1896define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) { 1897; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 1898; GFX9: ; %bb.0: 1899; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1900; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1901; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1902; GFX9-NEXT: v_mov_b32_e32 v0, s0 1903; GFX9-NEXT: v_mov_b32_e32 v2, s1 1904; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1905; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1906; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1907; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1908; GFX9-NEXT: flat_store_byte v[0:1], v0 1909; GFX9-NEXT: s_endpgm 1910; 1911; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 1912; GFX10: ; %bb.0: 1913; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1914; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1915; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1916; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1917; GFX10-NEXT: v_mov_b32_e32 v0, s0 1918; GFX10-NEXT: v_mov_b32_e32 v1, s1 1919; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1920; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1921; GFX10-NEXT: flat_store_byte v[0:1], v0 1922; GFX10-NEXT: s_endpgm 1923; 1924; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 1925; GFX11: ; %bb.0: 1926; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1927; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1928; GFX11-NEXT: v_mov_b32_e32 v1, s1 1929; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0 1930; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1931; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1932; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1933; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1934; GFX11-NEXT: flat_store_b8 v[0:1], v0 1935; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1936; GFX11-NEXT: s_endpgm 1937 %gep = getelementptr i8, i8* %p, i64 -9223372036854767617 1938 %load = load volatile i8, i8* %gep, align 1 1939 store i8 %load, i8* undef 1940 ret void 1941} 1942 1943; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1944define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) { 1945; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 1946; GFX9: ; %bb.0: 1947; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1948; GFX9-NEXT: v_bfrev_b32_e32 v1, 1 1949; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1950; GFX9-NEXT: v_mov_b32_e32 v0, s0 1951; GFX9-NEXT: v_mov_b32_e32 v2, s1 1952; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1953; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1954; GFX9-NEXT: flat_load_ubyte v0, v[0:1] glc 1955; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1956; GFX9-NEXT: flat_store_byte v[0:1], v0 1957; GFX9-NEXT: s_endpgm 1958; 1959; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 1960; GFX10: ; %bb.0: 1961; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1962; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1963; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 1964; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 1965; GFX10-NEXT: v_mov_b32_e32 v0, s0 1966; GFX10-NEXT: v_mov_b32_e32 v1, s1 1967; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1968; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1969; GFX10-NEXT: flat_store_byte v[0:1], v0 1970; GFX10-NEXT: s_endpgm 1971; 1972; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 1973; GFX11: ; %bb.0: 1974; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 1975; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1976; GFX11-NEXT: v_mov_b32_e32 v1, s1 1977; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 1978; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1979; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1980; GFX11-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1981; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1982; GFX11-NEXT: flat_store_b8 v[0:1], v0 1983; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1984; GFX11-NEXT: s_endpgm 1985 %gep = getelementptr i8, i8* %p, i64 -9223372036854767616 1986 %load = load volatile i8, i8* %gep, align 1 1987 store i8 %load, i8* undef 1988 ret void 1989} 1990