1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 6; FUNC-LABEL: {{^}}constant_load_i16: 7; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} 8; GCN-HSA: flat_load_ushort 9 10; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 11define void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(2)* %in) { 12entry: 13 %ld = load i16, i16 addrspace(2)* %in 14 store i16 %ld, i16 addrspace(1)* %out 15 ret void 16} 17 18; FUNC-LABEL: {{^}}constant_load_v2i16: 19; GCN: s_load_dword s 20 21; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 22define void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) { 23entry: 24 %ld = load <2 x i16>, <2 x i16> addrspace(2)* %in 25 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 26 ret void 27} 28 29; FUNC-LABEL: {{^}}constant_load_v3i16: 30; GCN: s_load_dwordx2 s 31 32; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 33; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 34define void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { 35entry: 36 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in 37 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 38 ret void 39} 40 41; FUNC-LABEL: {{^}}constant_load_v4i16: 42; GCN: s_load_dwordx2 43 44; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 45define void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) { 46entry: 47 %ld = load <4 x i16>, <4 x i16> addrspace(2)* %in 48 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 49 ret void 50} 51 52; FUNC-LABEL: {{^}}constant_load_v8i16: 53; GCN: s_load_dwordx4 54 55; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 56define void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) { 57entry: 58 %ld = load <8 x i16>, <8 x i16> addrspace(2)* %in 59 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 60 ret void 61} 62 63; FUNC-LABEL: {{^}}constant_load_v16i16: 64; GCN: s_load_dwordx8 65 66; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 67; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 68define void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) { 69entry: 70 %ld = load <16 x i16>, <16 x i16> addrspace(2)* %in 71 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 72 ret void 73} 74 75; FUNC-LABEL: {{^}}constant_zextload_i16_to_i32: 76; GCN-NOHSA: buffer_load_ushort 77; GCN-NOHSA: buffer_store_dword 78 79; GCN-HSA: flat_load_ushort 80; GCN-HSA: flat_store_dword 81 82; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1 83define void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { 84 %a = load i16, i16 addrspace(2)* %in 85 %ext = zext i16 %a to i32 86 store i32 %ext, i32 addrspace(1)* %out 87 ret void 88} 89 90; FUNC-LABEL: {{^}}constant_sextload_i16_to_i32: 91; GCN-NOHSA: buffer_load_sshort 92; GCN-NOHSA: buffer_store_dword 93 94; GCN-HSA: flat_load_sshort 95; GCN-HSA: flat_store_dword 96 97; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 98; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 99; EG: 16 100define void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { 101 %a = load i16, i16 addrspace(2)* %in 102 %ext = sext i16 %a to i32 103 store i32 %ext, i32 addrspace(1)* %out 104 ret void 105} 106 107; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i32: 108; GCN-NOHSA: buffer_load_ushort 109; GCN-HSA: flat_load_ushort 110 111; EG: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}, 0, #1 112define void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { 113 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in 114 %ext = zext <1 x i16> %load to <1 x i32> 115 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 116 ret void 117} 118 119; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i32: 120; GCN-NOHSA: buffer_load_sshort 121; GCN-HSA: flat_load_sshort 122 123; EG: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 124; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 125; EG: 16 126define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { 127 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in 128 %ext = sext <1 x i16> %load to <1 x i32> 129 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 130 ret void 131} 132 133; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i32: 134; GCN: s_load_dword s 135; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xffff{{$}} 136; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 137 138; v2i16 is naturally 4 byte aligned 139; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 140; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal 141; EG: 16 142; EG: 16 143define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { 144 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in 145 %ext = zext <2 x i16> %load to <2 x i32> 146 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 147 ret void 148} 149 150; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i32: 151; GCN: s_load_dword s 152; GCN-DAG: s_ashr_i32 153; GCN-DAG: s_sext_i32_i16 154 155; v2i16 is naturally 4 byte aligned 156; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 157; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 158; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal 159; TODO: We should use ASHR instead of LSHR + BFE 160; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV\.[XYZW]}}, 0.0, literal 161; EG-DAG: 16 162; EG-DAG: 16 163define void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { 164 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in 165 %ext = sext <2 x i16> %load to <2 x i32> 166 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 167 ret void 168} 169 170; FUNC-LABEL: {{^}}constant_zextload_v3i16_to_v3i32: 171; GCN: s_load_dwordx2 172 173; v3i16 is naturally 8 byte aligned 174; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 175; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, 176; EG: CF_END 177; EG-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 178; EG-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 179; TODO: This should use DST, but for some there are redundant MOVs 180; EG-DAG: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal 181; EG-DAG: 16 182; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal 183; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal 184; EG-DAG: 65535 185; EG-DAG: 65535 186define void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { 187entry: 188 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in 189 %ext = zext <3 x i16> %ld to <3 x i32> 190 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 191 ret void 192} 193 194; FUNC-LABEL: {{^}}constant_sextload_v3i16_to_v3i32: 195; GCN: s_load_dwordx2 196 197; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 198; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, 199; v3i16 is naturally 8 byte aligned 200; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[PTR:T[0-9]\.[XYZW]]], 0, #1 201; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 202; EG-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal 203; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal 204; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal 205; EG-DAG: 16 206; EG-DAG: 16 207define void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(2)* %in) { 208entry: 209 %ld = load <3 x i16>, <3 x i16> addrspace(2)* %in 210 %ext = sext <3 x i16> %ld to <3 x i32> 211 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 212 ret void 213} 214 215; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i32: 216; GCN: s_load_dwordx2 217; GCN-DAG: s_and_b32 218; GCN-DAG: s_lshr_b32 219 220; v4i16 is naturally 8 byte aligned 221; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}} 222; EG: VTX_READ_64 [[LD:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 223; TODO: This should use LD, but for some there are redundant MOVs 224; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*\.[XYZW]}}, literal 225; EG-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*\.[XYZW]}}, literal 226; EG-DAG: 16 227; EG-DAG: 16 228; EG-DAG: AND_INT {{[* ]*}}[[ST]].X, {{T[0-9]\.[XYZW]}}, literal 229; EG-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{T[0-9]\.[XYZW]}}, literal 230; EG-DAG: 65535 231; EG-DAG: 65535 232define void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { 233 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in 234 %ext = zext <4 x i16> %load to <4 x i32> 235 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 236 ret void 237} 238 239; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i32: 240; GCN: s_load_dwordx2 241; GCN-DAG: s_ashr_i32 242; GCN-DAG: s_sext_i32_i16 243 244; v4i16 is naturally 8 byte aligned 245; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 246; EG: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 247; TODO: This should use LD, but for some there are redundant MOVs 248; EG-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal 249; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal 250; TODO: We should use ASHR instead of LSHR + BFE 251; EG-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal 252; EG-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal 253; EG-DAG: 16 254; EG-DAG: 16 255; EG-DAG: 16 256; EG-DAG: 16 257define void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { 258 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in 259 %ext = sext <4 x i16> %load to <4 x i32> 260 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 261 ret void 262} 263 264; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i32: 265; GCN: s_load_dwordx4 266; GCN-DAG: s_and_b32 267; GCN-DAG: s_lshr_b32 268 269; v8i16 is naturally 16 byte aligned 270; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, 271; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, 272; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 273; TODO: These should use LSHR instead of BFE_UINT 274; TODO: This should use DST, but for some there are redundant MOVs 275; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal 276; EG-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal 277; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal 278; EG-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal 279; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal 280; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal 281; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal 282; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal 283; EG-DAG: 16 284; EG-DAG: 16 285; EG-DAG: 16 286; EG-DAG: 16 287; EG-DAG: 65535 288; EG-DAG: 65535 289; EG-DAG: 65535 290; EG-DAG: 65535 291define void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { 292 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in 293 %ext = zext <8 x i16> %load to <8 x i32> 294 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 295 ret void 296} 297 298; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i32: 299; GCN: s_load_dwordx4 300; GCN-DAG: s_ashr_i32 301; GCN-DAG: s_sext_i32_i16 302 303; v8i16 is naturally 16 byte aligned 304; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, 305; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]+.[XYZW]}}, 306; EG: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 307; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT 308; TODO: This should use DST, but for some there are redundant MOVs 309; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal 310; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal 311; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal 312; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal 313; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal 314; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal 315; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal 316; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal 317; EG-DAG: 16 318; EG-DAG: 16 319; EG-DAG: 16 320; EG-DAG: 16 321; EG-DAG: 16 322; EG-DAG: 16 323; EG-DAG: 16 324; EG-DAG: 16 325define void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { 326 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in 327 %ext = sext <8 x i16> %load to <8 x i32> 328 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 329 ret void 330} 331 332; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i32: 333; GCN: s_load_dwordx8 334; GCN-DAG: s_and_b32 335; GCN-DAG: s_lshr_b32 336 337; v16i16 is naturally 32 byte aligned 338; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 0, #1 339; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+.[XYZW]}}, 16, #1 340define void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { 341 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in 342 %ext = zext <16 x i16> %load to <16 x i32> 343 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 344 ret void 345} 346 347; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i32: 348; GCN: s_load_dwordx8 349; GCN-DAG: s_ashr_i32 350; GCN-DAG: s_sext_i32_i16 351 352; v16i16 is naturally 32 byte aligned 353; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 0, #1 354; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], {{T[0-9]+\.[XYZW]}}, 16, #1 355define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { 356 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in 357 %ext = sext <16 x i16> %load to <16 x i32> 358 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 359 ret void 360} 361 362; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32: 363; GCN-DAG: s_load_dwordx16 364; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}} 365; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 366; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]] 367 368; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1 369; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1 370; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1 371; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1 372define void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { 373 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in 374 %ext = zext <32 x i16> %load to <32 x i32> 375 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 376 ret void 377} 378 379; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i32: 380; GCN: s_load_dwordx16 381; GCN-DAG: s_ashr_i32 382; GCN-DAG: s_sext_i32_i16 383 384; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1 385; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1 386; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1 387; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1 388define void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { 389 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in 390 %ext = sext <32 x i16> %load to <32 x i32> 391 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 392 ret void 393} 394 395; FUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i32: 396; GCN: s_load_dwordx16 397; GCN: s_load_dwordx16 398 399; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1 400; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1 401; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1 402; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1 403; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1 404; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1 405; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1 406; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1 407define void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { 408 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in 409 %ext = zext <64 x i16> %load to <64 x i32> 410 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 411 ret void 412} 413 414; FUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i32: 415 416; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1 417; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1 418; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 32, #1 419; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 48, #1 420; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 64, #1 421; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 80, #1 422; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 96, #1 423; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 112, #1 424define void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { 425 %load = load <64 x i16>, <64 x i16> addrspace(2)* %in 426 %ext = sext <64 x i16> %load to <64 x i32> 427 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 428 ret void 429} 430 431; FUNC-LABEL: {{^}}constant_zextload_i16_to_i64: 432; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]], 433; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]], 434; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 435 436; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 437; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 438 439; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 440; EG: MOV {{.*}}, 0.0 441define void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { 442 %a = load i16, i16 addrspace(2)* %in 443 %ext = zext i16 %a to i64 444 store i64 %ext, i64 addrspace(1)* %out 445 ret void 446} 447 448; FUNC-LABEL: {{^}}constant_sextload_i16_to_i64: 449; FIXME: Need to optimize this sequence to avoid extra bfe: 450; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 451; t31: i64 = any_extend t28 452; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 453 454; GCN-NOHSA-SI-DAG: buffer_load_sshort v[[LO:[0-9]+]], 455; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]], 456; GCN-NOHSA-VI-DAG: buffer_load_ushort v[[ULO:[0-9]+]], 457; GCN-NOHSA-VI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 458; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 459 460; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 461; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 462 463; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 464; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 465; TODO: These could be expanded earlier using ASHR 15 466; EG: 31 467define void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(2)* %in) #0 { 468 %a = load i16, i16 addrspace(2)* %in 469 %ext = sext i16 %a to i64 470 store i64 %ext, i64 addrspace(1)* %out 471 ret void 472} 473 474; FUNC-LABEL: {{^}}constant_zextload_v1i16_to_v1i64: 475 476; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 477; EG: MOV {{.*}}, 0.0 478define void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { 479 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in 480 %ext = zext <1 x i16> %load to <1 x i64> 481 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 482 ret void 483} 484 485; FUNC-LABEL: {{^}}constant_sextload_v1i16_to_v1i64: 486 487; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 488; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 489; TODO: These could be expanded earlier using ASHR 15 490; EG: 31 491define void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(2)* %in) #0 { 492 %load = load <1 x i16>, <1 x i16> addrspace(2)* %in 493 %ext = sext <1 x i16> %load to <1 x i64> 494 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 495 ret void 496} 497 498; FUNC-LABEL: {{^}}constant_zextload_v2i16_to_v2i64: 499 500; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 501define void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { 502 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in 503 %ext = zext <2 x i16> %load to <2 x i64> 504 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 505 ret void 506} 507 508; FUNC-LABEL: {{^}}constant_sextload_v2i16_to_v2i64: 509 510; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 511define void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 { 512 %load = load <2 x i16>, <2 x i16> addrspace(2)* %in 513 %ext = sext <2 x i16> %load to <2 x i64> 514 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 515 ret void 516} 517 518; FUNC-LABEL: {{^}}constant_zextload_v4i16_to_v4i64: 519 520; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 521define void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { 522 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in 523 %ext = zext <4 x i16> %load to <4 x i64> 524 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 525 ret void 526} 527 528; FUNC-LABEL: {{^}}constant_sextload_v4i16_to_v4i64: 529 530; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 531define void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 { 532 %load = load <4 x i16>, <4 x i16> addrspace(2)* %in 533 %ext = sext <4 x i16> %load to <4 x i64> 534 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 535 ret void 536} 537 538; FUNC-LABEL: {{^}}constant_zextload_v8i16_to_v8i64: 539 540; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 541define void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { 542 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in 543 %ext = zext <8 x i16> %load to <8 x i64> 544 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 545 ret void 546} 547 548; FUNC-LABEL: {{^}}constant_sextload_v8i16_to_v8i64: 549 550; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 551define void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(2)* %in) #0 { 552 %load = load <8 x i16>, <8 x i16> addrspace(2)* %in 553 %ext = sext <8 x i16> %load to <8 x i64> 554 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 555 ret void 556} 557 558; FUNC-LABEL: {{^}}constant_zextload_v16i16_to_v16i64: 559 560; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 561; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 562define void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { 563 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in 564 %ext = zext <16 x i16> %load to <16 x i64> 565 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 566 ret void 567} 568 569; FUNC-LABEL: {{^}}constant_sextload_v16i16_to_v16i64: 570 571; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 572; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 573define void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(2)* %in) #0 { 574 %load = load <16 x i16>, <16 x i16> addrspace(2)* %in 575 %ext = sext <16 x i16> %load to <16 x i64> 576 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 577 ret void 578} 579 580; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i64: 581 582; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 583; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 584; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 585; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 586define void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { 587 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in 588 %ext = zext <32 x i16> %load to <32 x i64> 589 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 590 ret void 591} 592 593; FUNC-LABEL: {{^}}constant_sextload_v32i16_to_v32i64: 594 595; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 596; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 597; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 598; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 599define void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(2)* %in) #0 { 600 %load = load <32 x i16>, <32 x i16> addrspace(2)* %in 601 %ext = sext <32 x i16> %load to <32 x i64> 602 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 603 ret void 604} 605 606; These trigger undefined register machine verifier errors 607 608; ; XFUNC-LABEL: {{^}}constant_zextload_v64i16_to_v64i64: 609; define void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { 610; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in 611; %ext = zext <64 x i16> %load to <64 x i64> 612; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 613; ret void 614; } 615 616; ; XFUNC-LABEL: {{^}}constant_sextload_v64i16_to_v64i64: 617; define void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(2)* %in) #0 { 618; %load = load <64 x i16>, <64 x i16> addrspace(2)* %in 619; %ext = sext <64 x i16> %load to <64 x i64> 620; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 621; ret void 622; } 623 624attributes #0 = { nounwind } 625