1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-SI,FUNC %s 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-HSA,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GCN-NOHSA,GCN-NOHSA-VI,FUNC %s 4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=EGCM -check-prefix=FUNC %s 5; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=EGCM -check-prefix=FUNC %s 6 7; FIXME: r600 is broken because the bigger testcases spill and it's not implemented 8 9; FUNC-LABEL: {{^}}global_load_i16: 10; GCN-NOHSA: buffer_load_ushort v{{[0-9]+}} 11; GCN-HSA: flat_load_ushort 12 13; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 14define void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 15entry: 16 %ld = load i16, i16 addrspace(1)* %in 17 store i16 %ld, i16 addrspace(1)* %out 18 ret void 19} 20 21; FUNC-LABEL: {{^}}global_load_v2i16: 22; GCN-NOHSA: buffer_load_dword v 23; GCN-HSA: flat_load_dword v 24 25; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 26define void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 27entry: 28 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in 29 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 30 ret void 31} 32 33; FUNC-LABEL: {{^}}global_load_v3i16: 34; GCN-NOHSA: buffer_load_dwordx2 v 35; GCN-HSA: flat_load_dwordx2 v 36 37; EGCM-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 38; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 39define void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 40entry: 41 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 42 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}global_load_v4i16: 47; GCN-NOHSA: buffer_load_dwordx2 48; GCN-HSA: flat_load_dwordx2 49 50; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 51define void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 52entry: 53 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in 54 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 55 ret void 56} 57 58; FUNC-LABEL: {{^}}global_load_v8i16: 59; GCN-NOHSA: buffer_load_dwordx4 60; GCN-HSA: flat_load_dwordx4 61 62; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 63define void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) { 64entry: 65 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in 66 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 67 ret void 68} 69 70; FUNC-LABEL: {{^}}global_load_v16i16: 71; GCN-NOHSA: buffer_load_dwordx4 72; GCN-NOHSA: buffer_load_dwordx4 73 74; GCN-HSA: flat_load_dwordx4 75; GCN-HSA: flat_load_dwordx4 76 77; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 78; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 79define void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) { 80entry: 81 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in 82 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}global_zextload_i16_to_i32: 87; GCN-NOHSA: buffer_load_ushort 88; GCN-NOHSA: buffer_store_dword 89 90; GCN-HSA: flat_load_ushort 91; GCN-HSA: flat_store_dword 92 93; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 94define void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 95 %a = load i16, i16 addrspace(1)* %in 96 %ext = zext i16 %a to i32 97 store i32 %ext, i32 addrspace(1)* %out 98 ret void 99} 100 101; FUNC-LABEL: {{^}}global_sextload_i16_to_i32: 102; GCN-NOHSA: buffer_load_sshort 103; GCN-NOHSA: buffer_store_dword 104 105; GCN-HSA: flat_load_sshort 106; GCN-HSA: flat_store_dword 107 108; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 109; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 110; EGCM: 16 111define void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 112 %a = load i16, i16 addrspace(1)* %in 113 %ext = sext i16 %a to i32 114 store i32 %ext, i32 addrspace(1)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i32: 119; GCN-NOHSA: buffer_load_ushort 120; GCN-HSA: flat_load_ushort 121 122; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 123define void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 124 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 125 %ext = zext <1 x i16> %load to <1 x i32> 126 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 127 ret void 128} 129 130; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i32: 131; GCN-NOHSA: buffer_load_sshort 132; GCN-HSA: flat_load_sshort 133 134; EGCM: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], T{{[0-9]+}}.X, 0, #1 135; EGCM: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 136; EGCM: 16 137define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 138 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 139 %ext = sext <1 x i16> %load to <1 x i32> 140 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 141 ret void 142} 143 144; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i32: 145; GCN-NOHSA: buffer_load_dword 146; GCN-HSA: flat_load_dword 147 148; EGCM: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 149; EGCM: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], literal 150; EGCM: 16 151define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 152 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 153 %ext = zext <2 x i16> %load to <2 x i32> 154 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 155 ret void 156} 157 158; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i32: 159; GCN-NOHSA: buffer_load_dword 160 161; GCN-HSA: flat_load_dword 162 163; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, 164; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} 165; EGCM: VTX_READ_32 [[DST:T[0-9].[XYZW]]], [[DST]], 0, #1 166; TODO: This should use ASHR instead of LSHR + BFE 167; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].X, [[DST]], 0.0, literal 168; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{PV.[XYZW]}}, 0.0, literal 169; EGCM-DAG: 16 170; EGCM-DAG: 16 171define void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 172 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 173 %ext = sext <2 x i16> %load to <2 x i32> 174 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 175 ret void 176} 177 178; FUNC-LABEL: {{^}}global_zextload_v3i16_to_v3i32: 179; GCN-NOHSA: buffer_load_dwordx2 180; GCN-HSA: flat_load_dwordx2 181 182; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} 183; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} 184; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, 185; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, 186; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 187; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 188; TODO: This should use DST, but for some there are redundant MOVs 189; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal 190; EGCM: 16 191; EGCM: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal 192; EGCM: AND_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], literal 193define void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 194entry: 195 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 196 %ext = zext <3 x i16> %ld to <3 x i32> 197 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 198 ret void 199} 200 201; FUNC-LABEL: {{^}}global_sextload_v3i16_to_v3i32: 202; GCN-NOHSA: buffer_load_dwordx2 203; GCN-HSA: flat_load_dwordx2 204 205; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} 206; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} 207; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, 208; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, 209; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 210; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 4, #1 211; TODO: This should use DST, but for some there are redundant MOVs 212; EGCM-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal 213; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal 214; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], 0.0, literal 215; EGCM-DAG: 16 216; EGCM-DAG: 16 217define void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 218entry: 219 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 220 %ext = sext <3 x i16> %ld to <3 x i32> 221 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 222 ret void 223} 224 225; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i32: 226; GCN-NOHSA: buffer_load_dwordx2 227 228; GCN-HSA: flat_load_dwordx2 229 230; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} 231; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 232; EGCM: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 233; TODO: This should use DST, but for some there are redundant MOVs 234; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST]].Y, {{.*}}, literal 235; EGCM-DAG: 16 236; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST]].W, {{.*}}, literal 237; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].X, {{.*}}, literal 238; EGCM-DAG: AND_INT {{[* ]*}}[[ST]].Z, {{.*}}, literal 239; EGCM-DAG: 16 240define void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 241 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 242 %ext = zext <4 x i16> %load to <4 x i32> 243 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 244 ret void 245} 246 247; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i32: 248; GCN-NOHSA: buffer_load_dwordx2 249 250; GCN-HSA: flat_load_dwordx2 251 252; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST:T[0-9]]], {{T[0-9]\.[XYZW]}} 253; EG: MEM_RAT_CACHELESS STORE_RAW [[ST:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 254; EGCM: VTX_READ_64 [[DST:T[0-9]]].XY, {{T[0-9].[XYZW]}}, 0, #1 255; TODO: We should use ASHR instead of LSHR + BFE 256; TODO: This should use DST, but for some there are redundant MOVs 257; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].X, {{.*}}, 0.0, literal 258; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Y, {{.*}}, 0.0, literal 259; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].Z, {{.*}}, 0.0, literal 260; EGCM-DAG: BFE_INT {{[* ]*}}[[ST]].W, {{.*}}, 0.0, literal 261; EGCM-DAG: 16 262; EGCM-DAG: 16 263; EGCM-DAG: 16 264; EGCM-DAG: 16 265define void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 266 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 267 %ext = sext <4 x i16> %load to <4 x i32> 268 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 269 ret void 270} 271 272; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i32: 273; GCN-NOHSA: buffer_load_dwordx4 274; GCN-HSA: flat_load_dwordx4 275 276; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} 277; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]], {{T[0-9]\.[XYZW]}} 278; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 279; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 280; EGCM: CF_END 281; EGCM: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 282; TODO: These should use LSHR instead of BFE_UINT 283; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].Y, {{.*}}, literal 284; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_LO]].W, {{.*}}, literal 285; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].Y, {{.*}}, literal 286; EGCM-DAG: BFE_UINT {{[* ]*}}[[ST_HI]].W, {{.*}}, literal 287; EGCM-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, literal 288; EGCM-DAG: AND_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, literal 289; EGCM-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, literal 290; EGCM-DAG: AND_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, literal 291; EGCM-DAG: 65535 292; EGCM-DAG: 65535 293; EGCM-DAG: 65535 294; EGCM-DAG: 65535 295; EGCM-DAG: 16 296; EGCM-DAG: 16 297; EGCM-DAG: 16 298; EGCM-DAG: 16 299define void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 300 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 301 %ext = zext <8 x i16> %load to <8 x i32> 302 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 303 ret void 304} 305 306; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i32: 307; GCN-NOHSA: buffer_load_dwordx4 308; GCN-HSA: flat_load_dwordx4 309 310; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_LO:T[0-9]]], {{T[0-9]\.[XYZW]}} 311; CM-DAG: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]], {{T[0-9]\.[XYZW]}} 312; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 313; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].XYZW, {{T[0-9]\.[XYZW]}}, 314; EGCM: CF_END 315; EGCM: VTX_READ_128 [[DST:T[0-9]]].XYZW, {{T[0-9].[XYZW]}}, 0, #1 316; TODO: These should use ASHR instead of LSHR + BFE_INT 317; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, {{.*}}, 0.0, literal 318; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].W, {{.*}}, 0.0, literal 319; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Y, {{.*}}, 0.0, literal 320; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].W, {{.*}}, 0.0, literal 321; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{.*}}, 0.0, literal 322; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Z, {{.*}}, 0.0, literal 323; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{.*}}, 0.0, literal 324; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].Z, {{.*}}, 0.0, literal 325; EGCM-DAG: 16 326; EGCM-DAG: 16 327; EGCM-DAG: 16 328; EGCM-DAG: 16 329; EGCM-DAG: 16 330; EGCM-DAG: 16 331; EGCM-DAG: 16 332; EGCM-DAG: 16 333define void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 334 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 335 %ext = sext <8 x i16> %load to <8 x i32> 336 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 337 ret void 338} 339 340; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i32: 341; GCN-NOHSA: buffer_load_dwordx4 342; GCN-NOHSA: buffer_load_dwordx4 343 344; GCN-HSA: flat_load_dwordx4 345; GCN-HSA: flat_load_dwordx4 346 347; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 348; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 349define void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 350 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 351 %ext = zext <16 x i16> %load to <16 x i32> 352 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 353 ret void 354} 355 356; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i32: 357 358; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 359; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 360define void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 361 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 362 %ext = sext <16 x i16> %load to <16 x i32> 363 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 364 ret void 365} 366 367; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i32: 368; GCN-NOHSA: buffer_load_dwordx4 369; GCN-NOHSA: buffer_load_dwordx4 370; GCN-NOHSA: buffer_load_dwordx4 371; GCN-NOHSA: buffer_load_dwordx4 372 373; GCN-HSA: flat_load_dwordx4 374; GCN-HSA: flat_load_dwordx4 375; GCN-HSA: flat_load_dwordx4 376; GCN-HSA: flat_load_dwordx4 377 378; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 379; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 380; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 381; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 382define void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 383 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 384 %ext = zext <32 x i16> %load to <32 x i32> 385 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 386 ret void 387} 388 389; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i32: 390; GCN-NOHSA: buffer_load_dwordx4 391; GCN-NOHSA: buffer_load_dwordx4 392; GCN-NOHSA: buffer_load_dwordx4 393; GCN-NOHSA: buffer_load_dwordx4 394 395; GCN-HSA: flat_load_dwordx4 396; GCN-HSA: flat_load_dwordx4 397; GCN-HSA: flat_load_dwordx4 398; GCN-HSA: flat_load_dwordx4 399 400; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 401; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 402; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 403; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 404define void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 405 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 406 %ext = sext <32 x i16> %load to <32 x i32> 407 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 408 ret void 409} 410 411; FUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i32: 412; GCN-NOHSA: buffer_load_dwordx4 413; GCN-NOHSA: buffer_load_dwordx4 414; GCN-NOHSA: buffer_load_dwordx4 415; GCN-NOHSA: buffer_load_dwordx4 416; GCN-NOHSA: buffer_load_dwordx4 417; GCN-NOHSA: buffer_load_dwordx4 418; GCN-NOHSA: buffer_load_dwordx4 419; GCN-NOHSA: buffer_load_dwordx4 420 421; GCN-HSA: flat_load_dwordx4 422; GCN-HSA: flat_load_dwordx4 423; GCN-HSA: flat_load_dwordx4 424; GCN-HSA: flat_load_dwordx4 425; GCN-HSA: flat_load_dwordx4 426; GCN-HSA: flat_load_dwordx4 427; GCN-HSA: flat_load_dwordx4 428; GCN-HSA: flat_load_dwordx4 429 430; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 431; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 432; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 433; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 434; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 435; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 436; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 437; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 438define void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 439 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 440 %ext = zext <64 x i16> %load to <64 x i32> 441 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 442 ret void 443} 444 445; FUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i32: 446 447; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 0, #1 448; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 16, #1 449; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 32, #1 450; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 48, #1 451; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 64, #1 452; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 80, #1 453; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 96, #1 454; EGCM-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+.[XYZW]}}, 112, #1 455define void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 456 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 457 %ext = sext <64 x i16> %load to <64 x i32> 458 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 459 ret void 460} 461 462; FUNC-LABEL: {{^}}global_zextload_i16_to_i64: 463; GCN-NOHSA-DAG: buffer_load_ushort v[[LO:[0-9]+]], 464; GCN-HSA-DAG: flat_load_ushort v[[LO:[0-9]+]], 465; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 466 467; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 468; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 469 470; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 471; EGCM: MOV {{.*}}, 0.0 472define void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 473 %a = load i16, i16 addrspace(1)* %in 474 %ext = zext i16 %a to i64 475 store i64 %ext, i64 addrspace(1)* %out 476 ret void 477} 478 479; FUNC-LABEL: {{^}}global_sextload_i16_to_i64: 480; FIXME: Need to optimize this sequence to avoid extra bfe: 481; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 482; t31: i64 = any_extend t28 483; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 484 485; GCN-NOHSA-SI-DAG: buffer_load_sshort v[[LO:[0-9]+]], 486; GCN-HSA-DAG: flat_load_sshort v[[LO:[0-9]+]], 487; GCN-NOHSA-VI-DAG: buffer_load_ushort v[[ULO:[0-9]+]], 488; GCN-NOHSA-VI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 489; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 490 491; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 492; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 493 494; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 495; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 496; TODO: These could be expanded earlier using ASHR 15 497; EGCM: 31 498define void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 499 %a = load i16, i16 addrspace(1)* %in 500 %ext = sext i16 %a to i64 501 store i64 %ext, i64 addrspace(1)* %out 502 ret void 503} 504 505; FUNC-LABEL: {{^}}global_zextload_v1i16_to_v1i64: 506 507; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 508; EGCM: MOV {{.*}}, 0.0 509define void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 510 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 511 %ext = zext <1 x i16> %load to <1 x i64> 512 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 513 ret void 514} 515 516; FUNC-LABEL: {{^}}global_sextload_v1i16_to_v1i64: 517 518; EGCM: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 519; EGCM: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 520; TODO: These could be expanded earlier using ASHR 15 521; EGCM: 31 522define void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 523 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 524 %ext = sext <1 x i16> %load to <1 x i64> 525 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 526 ret void 527} 528 529; FUNC-LABEL: {{^}}global_zextload_v2i16_to_v2i64: 530define void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 531 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 532 %ext = zext <2 x i16> %load to <2 x i64> 533 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 534 ret void 535} 536 537; FUNC-LABEL: {{^}}global_sextload_v2i16_to_v2i64: 538 539; EGCM: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 540define void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 541 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 542 %ext = sext <2 x i16> %load to <2 x i64> 543 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 544 ret void 545} 546 547; FUNC-LABEL: {{^}}global_zextload_v4i16_to_v4i64: 548 549; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 550define void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 551 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 552 %ext = zext <4 x i16> %load to <4 x i64> 553 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 554 ret void 555} 556 557; FUNC-LABEL: {{^}}global_sextload_v4i16_to_v4i64: 558 559; EGCM: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 560define void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 561 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 562 %ext = sext <4 x i16> %load to <4 x i64> 563 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 564 ret void 565} 566 567; FUNC-LABEL: {{^}}global_zextload_v8i16_to_v8i64: 568 569; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 570define void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 571 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 572 %ext = zext <8 x i16> %load to <8 x i64> 573 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 574 ret void 575} 576 577; FUNC-LABEL: {{^}}global_sextload_v8i16_to_v8i64: 578 579; EGCM: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 580define void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 581 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 582 %ext = sext <8 x i16> %load to <8 x i64> 583 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 584 ret void 585} 586 587; FUNC-LABEL: {{^}}global_zextload_v16i16_to_v16i64: 588 589; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 590; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 591define void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 592 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 593 %ext = zext <16 x i16> %load to <16 x i64> 594 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 595 ret void 596} 597 598; FUNC-LABEL: {{^}}global_sextload_v16i16_to_v16i64: 599 600; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 601; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 602define void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 603 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 604 %ext = sext <16 x i16> %load to <16 x i64> 605 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 606 ret void 607} 608 609; FUNC-LABEL: {{^}}global_zextload_v32i16_to_v32i64: 610 611; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 612; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 613; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 614; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 615define void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 616 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 617 %ext = zext <32 x i16> %load to <32 x i64> 618 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 619 ret void 620} 621 622; FUNC-LABEL: {{^}}global_sextload_v32i16_to_v32i64: 623 624; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 625; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 626; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 32, #1 627; EGCM-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 48, #1 628define void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 629 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 630 %ext = sext <32 x i16> %load to <32 x i64> 631 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 632 ret void 633} 634 635; ; XFUNC-LABEL: {{^}}global_zextload_v64i16_to_v64i64: 636; define void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 637; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 638; %ext = zext <64 x i16> %load to <64 x i64> 639; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 640; ret void 641; } 642 643; ; XFUNC-LABEL: {{^}}global_sextload_v64i16_to_v64i64: 644; define void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 645; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 646; %ext = sext <64 x i16> %load to <64 x i64> 647; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 648; ret void 649; } 650 651attributes #0 = { nounwind } 652