1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,SI,FUNC %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 6 7 8; FUNC-LABEL: {{^}}global_load_i8: 9; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} 10; GCN-HSA: flat_load_ubyte 11 12; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 13; TODO: NOT AND 14define amdgpu_kernel void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 15entry: 16 %ld = load i8, i8 addrspace(1)* %in 17 store i8 %ld, i8 addrspace(1)* %out 18 ret void 19} 20 21; FUNC-LABEL: {{^}}global_load_v2i8: 22; GCN-NOHSA: buffer_load_ushort v 23; GCN-HSA: flat_load_ushort v 24 25; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 26define amdgpu_kernel void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 27entry: 28 %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in 29 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 30 ret void 31} 32 33; FUNC-LABEL: {{^}}global_load_v3i8: 34; GCN-NOHSA: buffer_load_dword v 35; GCN-HSA: flat_load_dword v 36 37; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 38define amdgpu_kernel void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 39entry: 40 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 41 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out 42 ret void 43} 44 45; FUNC-LABEL: {{^}}global_load_v4i8: 46; GCN-NOHSA: buffer_load_dword v 47; GCN-HSA: flat_load_dword v 48 49; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 50define amdgpu_kernel void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 51entry: 52 %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in 53 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out 54 ret void 55} 56 57; FUNC-LABEL: {{^}}global_load_v8i8: 58; GCN-NOHSA: buffer_load_dwordx2 59; GCN-HSA: flat_load_dwordx2 60 61; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 62define amdgpu_kernel void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 63entry: 64 %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in 65 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out 66 ret void 67} 68 69; FUNC-LABEL: {{^}}global_load_v16i8: 70; GCN-NOHSA: buffer_load_dwordx4 71 72; GCN-HSA: flat_load_dwordx4 73 74; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 75define amdgpu_kernel void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 76entry: 77 %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in 78 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out 79 ret void 80} 81 82; FUNC-LABEL: {{^}}global_zextload_i8_to_i32: 83; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, 84; GCN-HSA: flat_load_ubyte 85 86; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 87define amdgpu_kernel void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 88 %a = load i8, i8 addrspace(1)* %in 89 %ext = zext i8 %a to i32 90 store i32 %ext, i32 addrspace(1)* %out 91 ret void 92} 93 94; FUNC-LABEL: {{^}}global_sextload_i8_to_i32: 95; GCN-NOHSA: buffer_load_sbyte 96; GCN-HSA: flat_load_sbyte 97 98; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 99; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 100; EG: 8 101define amdgpu_kernel void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 102 %ld = load i8, i8 addrspace(1)* %in 103 %ext = sext i8 %ld to i32 104 store i32 %ext, i32 addrspace(1)* %out 105 ret void 106} 107 108; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32: 109 110; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 111define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 112 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 113 %ext = zext <1 x i8> %load to <1 x i32> 114 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32: 119 120; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 121; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 122; EG: 8 123define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 124 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 125 %ext = sext <1 x i8> %load to <1 x i32> 126 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 127 ret void 128} 129 130; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32: 131; GCN-NOHSA: buffer_load_ushort 132; GCN-HSA: flat_load_ushort 133 134; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 135; TODO: These should use DST, but for some there are redundant MOVs 136; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal 137; EG-DAG: 8 138define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 139 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 140 %ext = zext <2 x i8> %load to <2 x i32> 141 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 142 ret void 143} 144 145; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32: 146; GCN-NOHSA: buffer_load_ushort 147; GCN-HSA: flat_load_ushort 148 149; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 150; TODO: These should use DST, but for some there are redundant MOVs 151; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 152; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 153; EG-DAG: 8 154; EG-DAG: 8 155define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 156 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 157 %ext = sext <2 x i8> %load to <2 x i32> 158 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 159 ret void 160} 161 162; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32: 163; GCN-NOHSA: buffer_load_dword v 164; GCN-HSA: flat_load_dword v 165 166; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 167; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 168; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 169; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, 170 171; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 172; TODO: These should use DST, but for some there are redundant MOVs 173; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 174; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 175; EG-DAG: 8 176; EG-DAG: 8 177define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 178entry: 179 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 180 %ext = zext <3 x i8> %ld to <3 x i32> 181 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 182 ret void 183} 184 185; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32: 186; GCN-NOHSA: buffer_load_dword v 187; GCN-HSA: flat_load_dword v 188 189;FIXME: Need to optimize this sequence to avoid extra shift on VI. 190 191; t23: i16 = truncate t18 192; t49: i16 = srl t23, Constant:i32<8> 193; t57: i32 = any_extend t49 194; t58: i32 = sign_extend_inreg t57, ValueType:ch:i8 195 196; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 197; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}} 198; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8 199; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 200; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 201 202; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 203; TODO: These should use DST, but for some there are redundant MOVs 204; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 205; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 206; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 207; EG-DAG: 8 208; EG-DAG: 8 209; EG-DAG: 8 210define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 211entry: 212 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 213 %ext = sext <3 x i8> %ld to <3 x i32> 214 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 215 ret void 216} 217 218; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32: 219; GCN-NOHSA: buffer_load_dword 220; GCN-HSA: flat_load_dword 221 222; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 223; TODO: These should use DST, but for some there are redundant MOVs 224; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 225; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 226; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 227; EG-DAG: 8 228; EG-DAG: 8 229; EG-DAG: 8 230define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 231 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 232 %ext = zext <4 x i8> %load to <4 x i32> 233 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 234 ret void 235} 236 237; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32: 238; GCN-NOHSA: buffer_load_dword 239; GCN-HSA: flat_load_dword 240 241; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 242; TODO: These should use DST, but for some there are redundant MOVs 243; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 244; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 245; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 246; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 247; EG-DAG: 8 248; EG-DAG: 8 249; EG-DAG: 8 250; EG-DAG: 8 251define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 252 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 253 %ext = sext <4 x i8> %load to <4 x i32> 254 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 255 ret void 256} 257 258; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32: 259 260; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 261; TODO: These should use DST, but for some there are redundant MOVs 262; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 263; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 264; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 265; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 266; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 267; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 268; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 269; EG-DAG: 8 270; EG-DAG: 8 271; EG-DAG: 8 272; EG-DAG: 8 273; EG-DAG: 8 274; EG-DAG: 8 275; EG-DAG: 8 276define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 277 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 278 %ext = zext <8 x i8> %load to <8 x i32> 279 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 280 ret void 281} 282 283; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32: 284 285; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1 286; TODO: These should use DST, but for some there are redundant MOVs 287; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 288; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 289; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 290; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 291; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 292; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 293; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 294; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 295; EG-DAG: 8 296; EG-DAG: 8 297; EG-DAG: 8 298; EG-DAG: 8 299; EG-DAG: 8 300; EG-DAG: 8 301; EG-DAG: 8 302; EG-DAG: 8 303define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 304 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 305 %ext = sext <8 x i8> %load to <8 x i32> 306 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 307 ret void 308} 309 310; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32: 311 312; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 313; TODO: These should use DST, but for some there are redundant MOVs 314; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 315; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 316; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 317; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 318; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 319; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 320; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 321; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 322; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 323; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 324; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 325; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 326; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 327; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 328; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 329; EG-DAG: 8 330; EG-DAG: 8 331; EG-DAG: 8 332; EG-DAG: 8 333; EG-DAG: 8 334; EG-DAG: 8 335; EG-DAG: 8 336; EG-DAG: 8 337; EG-DAG: 8 338; EG-DAG: 8 339; EG-DAG: 8 340; EG-DAG: 8 341; EG-DAG: 8 342; EG-DAG: 8 343; EG-DAG: 8 344define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 345 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 346 %ext = zext <16 x i8> %load to <16 x i32> 347 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 348 ret void 349} 350 351; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32: 352 353; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 354; TODO: These should use DST, but for some there are redundant MOVs 355; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 356; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 357; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 358; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 359; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 360; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 361; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 362; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 363; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 364; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 365; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 366; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 367; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 368; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 369; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 370; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 371; EG-DAG: 8 372; EG-DAG: 8 373; EG-DAG: 8 374; EG-DAG: 8 375; EG-DAG: 8 376; EG-DAG: 8 377; EG-DAG: 8 378; EG-DAG: 8 379; EG-DAG: 8 380; EG-DAG: 8 381; EG-DAG: 8 382; EG-DAG: 8 383; EG-DAG: 8 384; EG-DAG: 8 385; EG-DAG: 8 386; EG-DAG: 8 387define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 388 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 389 %ext = sext <16 x i8> %load to <16 x i32> 390 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 391 ret void 392} 393 394; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32: 395 396; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 397; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 398; TODO: These should use DST, but for some there are redundant MOVs 399; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 400; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 401; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 402; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 403; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 404; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 405; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 406; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 407; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 408; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 409; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 410; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 411; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 412; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 413; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 414; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 415; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 416; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 417; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 418; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 419; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 420; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 421; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 422; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 423; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 424; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 425; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 426; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 427; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 428; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 429; EG-DAG: 8 430; EG-DAG: 8 431; EG-DAG: 8 432; EG-DAG: 8 433; EG-DAG: 8 434; EG-DAG: 8 435; EG-DAG: 8 436; EG-DAG: 8 437; EG-DAG: 8 438; EG-DAG: 8 439; EG-DAG: 8 440; EG-DAG: 8 441; EG-DAG: 8 442; EG-DAG: 8 443; EG-DAG: 8 444; EG-DAG: 8 445; EG-DAG: 8 446; EG-DAG: 8 447; EG-DAG: 8 448; EG-DAG: 8 449; EG-DAG: 8 450; EG-DAG: 8 451; EG-DAG: 8 452; EG-DAG: 8 453; EG-DAG: 8 454; EG-DAG: 8 455; EG-DAG: 8 456; EG-DAG: 8 457; EG-DAG: 8 458; EG-DAG: 8 459define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 460 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 461 %ext = zext <32 x i8> %load to <32 x i32> 462 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 463 ret void 464} 465 466; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32: 467 468; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 469; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1 470; TODO: These should use DST, but for some there are redundant MOVs 471; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 472; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 473; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 474; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 475; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 476; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 477; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 478; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 479; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 480; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 481; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 482; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 483; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 484; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 485; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 486; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 487; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 488; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 489; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 490; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 491; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 492; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 493; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 494; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 495; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 496; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 497; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 498; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 499; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 500; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 501; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 502; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 503; EG-DAG: 8 504; EG-DAG: 8 505; EG-DAG: 8 506; EG-DAG: 8 507; EG-DAG: 8 508; EG-DAG: 8 509; EG-DAG: 8 510; EG-DAG: 8 511; EG-DAG: 8 512; EG-DAG: 8 513; EG-DAG: 8 514; EG-DAG: 8 515; EG-DAG: 8 516; EG-DAG: 8 517; EG-DAG: 8 518; EG-DAG: 8 519; EG-DAG: 8 520; EG-DAG: 8 521; EG-DAG: 8 522; EG-DAG: 8 523; EG-DAG: 8 524; EG-DAG: 8 525; EG-DAG: 8 526; EG-DAG: 8 527; EG-DAG: 8 528; EG-DAG: 8 529; EG-DAG: 8 530; EG-DAG: 8 531; EG-DAG: 8 532; EG-DAG: 8 533; EG-DAG: 8 534; EG-DAG: 8 535define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 536 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 537 %ext = sext <32 x i8> %load to <32 x i32> 538 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 539 ret void 540} 541 542; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32: 543 544; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 545; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 546; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 547; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 548define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 549 %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 550 %ext = zext <64 x i8> %load to <64 x i32> 551 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 552 ret void 553} 554 555; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32: 556 557; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 558; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 559; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 560; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 561define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 562 %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 563 %ext = sext <64 x i8> %load to <64 x i32> 564 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 565 ret void 566} 567 568; FUNC-LABEL: {{^}}global_zextload_i8_to_i64: 569; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 570 571; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], 572; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]] 573 574; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], 575; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] 576 577; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 578; EG: MOV {{.*}}, 0.0 579define amdgpu_kernel void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 580 %a = load i8, i8 addrspace(1)* %in 581 %ext = zext i8 %a to i64 582 store i64 %ext, i64 addrspace(1)* %out 583 ret void 584} 585 586; FUNC-LABEL: {{^}}global_sextload_i8_to_i64: 587; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], 588; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], 589; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 590 591; GCN-NOHSA: buffer_store_dwordx2 v[[[LO]]:[[HI]]] 592; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] 593 594; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 595; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 596; TODO: Why not 7 ? 597; EG: 31 598define amdgpu_kernel void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 599 %a = load i8, i8 addrspace(1)* %in 600 %ext = sext i8 %a to i64 601 store i64 %ext, i64 addrspace(1)* %out 602 ret void 603} 604 605; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64: 606 607; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 608; EG: MOV {{.*}}, 0.0 609define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 610 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 611 %ext = zext <1 x i8> %load to <1 x i64> 612 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 613 ret void 614} 615 616; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64: 617 618; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 619; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 620; TODO: Why not 7 ? 621; EG: 31 622define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 623 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 624 %ext = sext <1 x i8> %load to <1 x i64> 625 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 626 ret void 627} 628 629; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64: 630 631; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 632define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 633 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 634 %ext = zext <2 x i8> %load to <2 x i64> 635 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 636 ret void 637} 638 639; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64: 640 641; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 642define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 643 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 644 %ext = sext <2 x i8> %load to <2 x i64> 645 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 646 ret void 647} 648 649; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64: 650 651; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 652define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 653 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 654 %ext = zext <4 x i8> %load to <4 x i64> 655 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 656 ret void 657} 658 659; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64: 660 661; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 662define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 663 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 664 %ext = sext <4 x i8> %load to <4 x i64> 665 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 666 ret void 667} 668 669; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64: 670 671; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 672define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 673 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 674 %ext = zext <8 x i8> %load to <8 x i64> 675 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 676 ret void 677} 678 679; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64: 680 681; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 682define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 683 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 684 %ext = sext <8 x i8> %load to <8 x i64> 685 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 686 ret void 687} 688 689; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64: 690 691; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 692define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 693 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 694 %ext = zext <16 x i8> %load to <16 x i64> 695 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 696 ret void 697} 698 699; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64: 700 701; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 702define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 703 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 704 %ext = sext <16 x i8> %load to <16 x i64> 705 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 706 ret void 707} 708 709; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64: 710 711; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 712; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 713define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 714 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 715 %ext = zext <32 x i8> %load to <32 x i64> 716 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 717 ret void 718} 719 720; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64: 721 722; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 723; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 724define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 725 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 726 %ext = sext <32 x i8> %load to <32 x i64> 727 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 728 ret void 729} 730 731; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64: 732; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 733; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 734; %ext = zext <64 x i8> %load to <64 x i64> 735; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 736; ret void 737; } 738 739; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64: 740; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 741; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 742; %ext = sext <64 x i8> %load to <64 x i64> 743; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 744; ret void 745; } 746 747; FUNC-LABEL: {{^}}global_zextload_i8_to_i16: 748; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], 749; GCN-NOHSA: buffer_store_short v[[VAL]] 750 751; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], 752; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 753 754; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 755define amdgpu_kernel void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 756 %a = load i8, i8 addrspace(1)* %in 757 %ext = zext i8 %a to i16 758 store i16 %ext, i16 addrspace(1)* %out 759 ret void 760} 761 762; FUNC-LABEL: {{^}}global_sextload_i8_to_i16: 763; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], 764; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], 765 766; GCN-NOHSA: buffer_store_short v[[VAL]] 767; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 768 769; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 770; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 771define amdgpu_kernel void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 772 %a = load i8, i8 addrspace(1)* %in 773 %ext = sext i8 %a to i16 774 store i16 %ext, i16 addrspace(1)* %out 775 ret void 776} 777 778; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16: 779 780; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 781define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 782 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 783 %ext = zext <1 x i8> %load to <1 x i16> 784 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 785 ret void 786} 787 788; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16: 789 790; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 791; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 792define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 793 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 794 %ext = sext <1 x i8> %load to <1 x i16> 795 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 796 ret void 797} 798 799; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16: 800 801; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 802define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 803 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 804 %ext = zext <2 x i8> %load to <2 x i16> 805 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 806 ret void 807} 808 809; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16: 810 811; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 812; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 813; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 814define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 815 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 816 %ext = sext <2 x i8> %load to <2 x i16> 817 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 818 ret void 819} 820 821; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16: 822 823; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 824define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 825 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 826 %ext = zext <4 x i8> %load to <4 x i16> 827 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 828 ret void 829} 830 831; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16: 832 833; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 834; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 835; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 836; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 837; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 838define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 839 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 840 %ext = sext <4 x i8> %load to <4 x i16> 841 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 842 ret void 843} 844 845; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16: 846 847; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 848define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 849 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 850 %ext = zext <8 x i8> %load to <8 x i16> 851 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 852 ret void 853} 854 855; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16: 856 857; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 858; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 859; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 860; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 861; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 862; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 863; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 864; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 865; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 866define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 867 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 868 %ext = sext <8 x i8> %load to <8 x i16> 869 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 870 ret void 871} 872 873; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16: 874 875; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 876define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 877 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 878 %ext = zext <16 x i8> %load to <16 x i16> 879 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 880 ret void 881} 882 883; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16: 884 885; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 886; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 887; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 888; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 889; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 890; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 891; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 892; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 893; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 894; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 895; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 896; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 897; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 898; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 899; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 900; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 901; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 902define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 903 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 904 %ext = sext <16 x i8> %load to <16 x i16> 905 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 906 ret void 907} 908 909; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16: 910 911; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 912; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 913define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 914 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 915 %ext = zext <32 x i8> %load to <32 x i16> 916 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 917 ret void 918} 919 920; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16: 921 922; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 923; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 924; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 925; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 926; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 927; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 928; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 929; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 930; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 931; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 932; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 933; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 934; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 935; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 936; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 937; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 938; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 939; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 940; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 941; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 942; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 943; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 944; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 945; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 946; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 947; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 948; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 949; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 950; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 951; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 952; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 953; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 954; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 955; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 956define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 957 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 958 %ext = sext <32 x i8> %load to <32 x i16> 959 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 960 ret void 961} 962 963; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16: 964; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 965; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 966; %ext = zext <64 x i8> %load to <64 x i16> 967; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 968; ret void 969; } 970 971; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16: 972; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 973; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 974; %ext = sext <64 x i8> %load to <64 x i16> 975; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 976; ret void 977; } 978 979attributes #0 = { nounwind } 980