1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 6; FUNC-LABEL: {{^}}local_load_i8: 7; GCN-NOT: s_wqm_b64 8; GCN: s_mov_b32 m0 9; GCN: ds_read_u8 10 11; EG: LDS_UBYTE_READ_RET 12define void @local_load_i8(i8 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 13entry: 14 %ld = load i8, i8 addrspace(3)* %in 15 store i8 %ld, i8 addrspace(3)* %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}local_load_v2i8: 20; GCN-NOT: s_wqm_b64 21; GCN: s_mov_b32 m0 22; GCN: ds_read_u16 23 24; EG: LDS_USHORT_READ_RET 25define void @local_load_v2i8(<2 x i8> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 26entry: 27 %ld = load <2 x i8>, <2 x i8> addrspace(3)* %in 28 store <2 x i8> %ld, <2 x i8> addrspace(3)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}local_load_v3i8: 33; GCN: ds_read_b32 34 35; EG: DS_READ_RET 36define void @local_load_v3i8(<3 x i8> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 37entry: 38 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 39 store <3 x i8> %ld, <3 x i8> addrspace(3)* %out 40 ret void 41} 42 43; FUNC-LABEL: {{^}}local_load_v4i8: 44; GCN: ds_read_b32 45 46; EG: LDS_READ_RET 47define void @local_load_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 48entry: 49 %ld = load <4 x i8>, <4 x i8> addrspace(3)* %in 50 store <4 x i8> %ld, <4 x i8> addrspace(3)* %out 51 ret void 52} 53 54; FUNC-LABEL: {{^}}local_load_v8i8: 55; GCN: ds_read_b64 56 57; EG: LDS_READ_RET 58; EG: LDS_READ_RET 59define void @local_load_v8i8(<8 x i8> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 60entry: 61 %ld = load <8 x i8>, <8 x i8> addrspace(3)* %in 62 store <8 x i8> %ld, <8 x i8> addrspace(3)* %out 63 ret void 64} 65 66; FUNC-LABEL: {{^}}local_load_v16i8: 67; GCN: ds_read2_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}} 68; GCN: ds_write2_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:{{[0-9]+}}], v[{{[0-9]+}}:[[HI]]{{\]}} offset1:1{{$}} 69 70; EG: LDS_READ_RET 71; EG: LDS_READ_RET 72; EG: LDS_READ_RET 73; EG: LDS_READ_RET 74define void @local_load_v16i8(<16 x i8> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 75entry: 76 %ld = load <16 x i8>, <16 x i8> addrspace(3)* %in 77 store <16 x i8> %ld, <16 x i8> addrspace(3)* %out 78 ret void 79} 80 81; FUNC-LABEL: {{^}}local_zextload_i8_to_i32: 82; GCN-NOT: s_wqm_b64 83; GCN: s_mov_b32 m0 84; GCN: ds_read_u8 85 86; EG: LDS_UBYTE_READ_RET 87define void @local_zextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 88 %a = load i8, i8 addrspace(3)* %in 89 %ext = zext i8 %a to i32 90 store i32 %ext, i32 addrspace(3)* %out 91 ret void 92} 93 94; FUNC-LABEL: {{^}}local_sextload_i8_to_i32: 95; GCN-NOT: s_wqm_b64 96; GCN: s_mov_b32 m0 97; GCN: ds_read_i8 98 99; EG: LDS_UBYTE_READ_RET 100; EG: BFE_INT 101define void @local_sextload_i8_to_i32(i32 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 102 %ld = load i8, i8 addrspace(3)* %in 103 %ext = sext i8 %ld to i32 104 store i32 %ext, i32 addrspace(3)* %out 105 ret void 106} 107 108; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i32: 109 110; EG: LDS_UBYTE_READ_RET 111define void @local_zextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 112 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 113 %ext = zext <1 x i8> %load to <1 x i32> 114 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i32: 119 120; EG: LDS_UBYTE_READ_RET 121; EG: BFE_INT 122define void @local_sextload_v1i8_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 123 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 124 %ext = sext <1 x i8> %load to <1 x i32> 125 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 126 ret void 127} 128 129; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i32: 130; GCN: ds_read_u16 131 132; EG: LDS_USHORT_READ_RET 133define void @local_zextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 134 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 135 %ext = zext <2 x i8> %load to <2 x i32> 136 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 137 ret void 138} 139 140; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i32: 141; GCN-NOT: s_wqm_b64 142; GCN: s_mov_b32 m0 143; GCN: ds_read_u16 144; FIXME: Need to optimize this sequence to avoid extra shift on VI. 145; t23: i16 = srl t39, Constant:i32<8> 146; t31: i32 = any_extend t23 147; t33: i32 = sign_extend_inreg t31, ValueType:ch:i8 148 149; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 150; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 151 152; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}} 153; VI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 154; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8 155 156; EG: LDS_USHORT_READ_RET 157; EG-DAG: BFE_INT 158; EG-DAG: BFE_INT 159define void @local_sextload_v2i8_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 160 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 161 %ext = sext <2 x i8> %load to <2 x i32> 162 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 163 ret void 164} 165 166; FUNC-LABEL: {{^}}local_zextload_v3i8_to_v3i32: 167; GCN: ds_read_b32 168 169; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 170; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, {{v[0-9]+}} 171; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 172; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, 173 174; EG: LDS_READ_RET 175define void @local_zextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 176entry: 177 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 178 %ext = zext <3 x i8> %ld to <3 x i32> 179 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 180 ret void 181} 182 183; FUNC-LABEL: {{^}}local_sextload_v3i8_to_v3i32: 184; GCN-NOT: s_wqm_b64 185; GCN: s_mov_b32 m0 186; GCN: ds_read_b32 187 188; GCN-DAG: v_bfe_i32 189; GCN-DAG: v_bfe_i32 190; GCN-DAG: v_bfe_i32 191; GCN-DAG: v_bfe_i32 192 193; GCN-DAG: ds_write_b64 194; GCN-DAG: ds_write_b32 195 196; EG: LDS_READ_RET 197; EG-DAG: BFE_INT 198; EG-DAG: BFE_INT 199; EG-DAG: BFE_INT 200define void @local_sextload_v3i8_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i8> addrspace(3)* %in) #0 { 201entry: 202 %ld = load <3 x i8>, <3 x i8> addrspace(3)* %in 203 %ext = sext <3 x i8> %ld to <3 x i32> 204 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 205 ret void 206} 207 208; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i32: 209; GCN-NOT: s_wqm_b64 210; GCN: s_mov_b32 m0 211; GCN: ds_read_b32 212 213; EG: LDS_READ_RET 214; EG-DAG: BFE_UINT 215; EG-DAG: BFE_UINT 216; EG-DAG: BFE_UINT 217define void @local_zextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 218 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 219 %ext = zext <4 x i8> %load to <4 x i32> 220 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 221 ret void 222} 223 224; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i32: 225; GCN-NOT: s_wqm_b64 226; GCN: s_mov_b32 m0 227; GCN: ds_read_b32 228 229; EG-DAG: LDS_READ_RET 230; EG-DAG: BFE_INT 231; EG-DAG: BFE_INT 232; EG-DAG: BFE_INT 233; EG-DAG: BFE_INT 234define void @local_sextload_v4i8_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 235 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 236 %ext = sext <4 x i8> %load to <4 x i32> 237 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 238 ret void 239} 240 241; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i32: 242 243; EG-DAG: LDS_READ_RET 244; EG-DAG: LDS_READ_RET 245; EG-DAG: BFE_UINT 246; EG-DAG: BFE_UINT 247; EG-DAG: BFE_UINT 248; EG-DAG: BFE_UINT 249; EG-DAG: BFE_UINT 250; EG-DAG: BFE_UINT 251define void @local_zextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 252 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 253 %ext = zext <8 x i8> %load to <8 x i32> 254 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 255 ret void 256} 257 258; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i32: 259 260; EG-DAG: LDS_READ_RET 261; EG-DAG: LDS_READ_RET 262; EG-DAG: BFE_INT 263; EG-DAG: BFE_INT 264; EG-DAG: BFE_INT 265; EG-DAG: BFE_INT 266; EG-DAG: BFE_INT 267; EG-DAG: BFE_INT 268; EG-DAG: BFE_INT 269; EG-DAG: BFE_INT 270define void @local_sextload_v8i8_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 271 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 272 %ext = sext <8 x i8> %load to <8 x i32> 273 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 274 ret void 275} 276 277; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i32: 278 279; EG-DAG: LDS_READ_RET 280; EG-DAG: LDS_READ_RET 281; EG-DAG: LDS_READ_RET 282; EG-DAG: LDS_READ_RET 283; EG-DAG: BFE_UINT 284; EG-DAG: BFE_UINT 285; EG-DAG: BFE_UINT 286; EG-DAG: BFE_UINT 287; EG-DAG: BFE_UINT 288; EG-DAG: BFE_UINT 289; EG-DAG: BFE_UINT 290; EG-DAG: BFE_UINT 291; EG-DAG: BFE_UINT 292; EG-DAG: BFE_UINT 293; EG-DAG: BFE_UINT 294; EG-DAG: BFE_UINT 295define void @local_zextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 296 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 297 %ext = zext <16 x i8> %load to <16 x i32> 298 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 299 ret void 300} 301 302; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i32: 303 304; EG-DAG: LDS_READ_RET 305; EG-DAG: LDS_READ_RET 306; EG-DAG: LDS_READ_RET 307; EG-DAG: LDS_READ_RET 308; EG-DAG: BFE_INT 309; EG-DAG: BFE_INT 310; EG-DAG: BFE_INT 311; EG-DAG: BFE_INT 312; EG-DAG: BFE_INT 313; EG-DAG: BFE_INT 314; EG-DAG: BFE_INT 315; EG-DAG: BFE_INT 316; EG-DAG: BFE_INT 317; EG-DAG: BFE_INT 318; EG-DAG: BFE_INT 319; EG-DAG: BFE_INT 320; EG-DAG: BFE_INT 321; EG-DAG: BFE_INT 322; EG-DAG: BFE_INT 323; EG-DAG: BFE_INT 324define void @local_sextload_v16i8_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 325 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 326 %ext = sext <16 x i8> %load to <16 x i32> 327 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 328 ret void 329} 330 331; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i32: 332 333; EG-DAG: LDS_READ_RET 334; EG-DAG: LDS_READ_RET 335; EG-DAG: LDS_READ_RET 336; EG-DAG: LDS_READ_RET 337; EG-DAG: LDS_READ_RET 338; EG-DAG: LDS_READ_RET 339; EG-DAG: LDS_READ_RET 340; EG-DAG: LDS_READ_RET 341define void @local_zextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 342 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 343 %ext = zext <32 x i8> %load to <32 x i32> 344 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 345 ret void 346} 347 348; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i32: 349 350; EG-DAG: LDS_READ_RET 351; EG-DAG: LDS_READ_RET 352; EG-DAG: LDS_READ_RET 353; EG-DAG: LDS_READ_RET 354; EG-DAG: LDS_READ_RET 355; EG-DAG: LDS_READ_RET 356; EG-DAG: LDS_READ_RET 357; EG-DAG: LDS_READ_RET 358define void @local_sextload_v32i8_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 359 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 360 %ext = sext <32 x i8> %load to <32 x i32> 361 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 362 ret void 363} 364 365; FUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i32: 366 367; EG-DAG: LDS_READ_RET 368; EG-DAG: LDS_READ_RET 369; EG-DAG: LDS_READ_RET 370; EG-DAG: LDS_READ_RET 371; EG-DAG: LDS_READ_RET 372; EG-DAG: LDS_READ_RET 373; EG-DAG: LDS_READ_RET 374; EG-DAG: LDS_READ_RET 375; EG-DAG: LDS_READ_RET 376; EG-DAG: LDS_READ_RET 377; EG-DAG: LDS_READ_RET 378; EG-DAG: LDS_READ_RET 379; EG-DAG: LDS_READ_RET 380; EG-DAG: LDS_READ_RET 381; EG-DAG: LDS_READ_RET 382; EG-DAG: LDS_READ_RET 383define void @local_zextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 384 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 385 %ext = zext <64 x i8> %load to <64 x i32> 386 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 387 ret void 388} 389 390; FUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i32: 391 392; EG-DAG: LDS_READ_RET 393; EG-DAG: LDS_READ_RET 394; EG-DAG: LDS_READ_RET 395; EG-DAG: LDS_READ_RET 396; EG-DAG: LDS_READ_RET 397; EG-DAG: LDS_READ_RET 398; EG-DAG: LDS_READ_RET 399; EG-DAG: LDS_READ_RET 400; EG-DAG: LDS_READ_RET 401; EG-DAG: LDS_READ_RET 402; EG-DAG: LDS_READ_RET 403; EG-DAG: LDS_READ_RET 404; EG-DAG: LDS_READ_RET 405; EG-DAG: LDS_READ_RET 406; EG-DAG: LDS_READ_RET 407; EG-DAG: LDS_READ_RET 408define void @local_sextload_v64i8_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 409 %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 410 %ext = sext <64 x i8> %load to <64 x i32> 411 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 412 ret void 413} 414 415; FUNC-LABEL: {{^}}local_zextload_i8_to_i64: 416; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 417; GCN-DAG: ds_read_u8 v[[LO:[0-9]+]], 418; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 419 420; EG: LDS_UBYTE_READ_RET 421; EG: MOV {{.*}}, literal 422; EG: 0.0 423define void @local_zextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 424 %a = load i8, i8 addrspace(3)* %in 425 %ext = zext i8 %a to i64 426 store i64 %ext, i64 addrspace(3)* %out 427 ret void 428} 429 430; FUNC-LABEL: {{^}}local_sextload_i8_to_i64: 431; GCN: ds_read_i8 v[[LO:[0-9]+]], 432; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 433 434; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 435 436; EG: LDS_UBYTE_READ_RET 437; EG: ASHR 438; TODO: why not 7? 439; EG: 31 440define void @local_sextload_i8_to_i64(i64 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 441 %a = load i8, i8 addrspace(3)* %in 442 %ext = sext i8 %a to i64 443 store i64 %ext, i64 addrspace(3)* %out 444 ret void 445} 446 447; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i64: 448 449; EG: LDS_UBYTE_READ_RET 450; EG: MOV {{.*}}, literal 451; TODO: merge? 452; EG: 0.0 453define void @local_zextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 454 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 455 %ext = zext <1 x i8> %load to <1 x i64> 456 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 457 ret void 458} 459 460; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i64: 461 462; EG: LDS_UBYTE_READ_RET 463; EG: ASHR 464; TODO: why not 7? 465; EG: 31 466define void @local_sextload_v1i8_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 467 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 468 %ext = sext <1 x i8> %load to <1 x i64> 469 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 470 ret void 471} 472 473; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i64: 474 475; EG: LDS_USHORT_READ_RET 476define void @local_zextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 477 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 478 %ext = zext <2 x i8> %load to <2 x i64> 479 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 480 ret void 481} 482 483; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i64: 484 485; EG: LDS_USHORT_READ_RET 486; EG: BFE_INT 487; EG: BFE_INT 488define void @local_sextload_v2i8_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 489 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 490 %ext = sext <2 x i8> %load to <2 x i64> 491 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 492 ret void 493} 494 495; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i64: 496 497; EG: LDS_READ_RET 498define void @local_zextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 499 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 500 %ext = zext <4 x i8> %load to <4 x i64> 501 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 502 ret void 503} 504 505; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i64: 506 507; EG: LDS_READ_RET 508define void @local_sextload_v4i8_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 509 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 510 %ext = sext <4 x i8> %load to <4 x i64> 511 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 512 ret void 513} 514 515; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i64: 516 517; EG: LDS_READ_RET 518; EG: LDS_READ_RET 519define void @local_zextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 520 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 521 %ext = zext <8 x i8> %load to <8 x i64> 522 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 523 ret void 524} 525 526; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i64: 527 528; EG: LDS_READ_RET 529; EG: LDS_READ_RET 530; EG-DAG: ASHR 531; EG-DAG: ASHR 532; EG-DAG: BFE_INT 533; EG-DAG: BFE_INT 534; EG-DAG: BFE_INT 535; EG-DAG: BFE_INT 536; EG-DAG: BFE_INT 537; EG-DAG: BFE_INT 538; EG-DAG: BFE_INT 539define void @local_sextload_v8i8_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 540 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 541 %ext = sext <8 x i8> %load to <8 x i64> 542 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 543 ret void 544} 545 546; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i64: 547 548; EG: LDS_READ_RET 549; EG: LDS_READ_RET 550; EG: LDS_READ_RET 551; EG: LDS_READ_RET 552define void @local_zextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 553 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 554 %ext = zext <16 x i8> %load to <16 x i64> 555 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 556 ret void 557} 558 559; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i64: 560 561; EG: LDS_READ_RET 562; EG: LDS_READ_RET 563; EG: LDS_READ_RET 564; EG: LDS_READ_RET 565define void @local_sextload_v16i8_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 566 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 567 %ext = sext <16 x i8> %load to <16 x i64> 568 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 569 ret void 570} 571 572; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i64: 573 574; EG: LDS_READ_RET 575; EG: LDS_READ_RET 576; EG: LDS_READ_RET 577; EG: LDS_READ_RET 578; EG: LDS_READ_RET 579; EG: LDS_READ_RET 580; EG: LDS_READ_RET 581; EG: LDS_READ_RET 582define void @local_zextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 583 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 584 %ext = zext <32 x i8> %load to <32 x i64> 585 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 586 ret void 587} 588 589; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i64: 590 591; EG: LDS_READ_RET 592; EG: LDS_READ_RET 593; EG: LDS_READ_RET 594; EG: LDS_READ_RET 595; EG: LDS_READ_RET 596; EG: LDS_READ_RET 597; EG: LDS_READ_RET 598; EG: LDS_READ_RET 599define void @local_sextload_v32i8_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 600 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 601 %ext = sext <32 x i8> %load to <32 x i64> 602 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 603 ret void 604} 605 606; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i64: 607; define void @local_zextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 608; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 609; %ext = zext <64 x i8> %load to <64 x i64> 610; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 611; ret void 612; } 613 614; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i64: 615; define void @local_sextload_v64i8_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 616; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 617; %ext = sext <64 x i8> %load to <64 x i64> 618; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 619; ret void 620; } 621 622; FUNC-LABEL: {{^}}local_zextload_i8_to_i16: 623; GCN: ds_read_u8 v[[VAL:[0-9]+]], 624; GCN: ds_write_b16 v[[VAL:[0-9]+]] 625 626; EG: LDS_UBYTE_READ_RET 627; EG: LDS_SHORT_WRITE 628define void @local_zextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 629 %a = load i8, i8 addrspace(3)* %in 630 %ext = zext i8 %a to i16 631 store i16 %ext, i16 addrspace(3)* %out 632 ret void 633} 634 635; FUNC-LABEL: {{^}}local_sextload_i8_to_i16: 636; GCN: ds_read_i8 v[[VAL:[0-9]+]], 637; GCN: ds_write_b16 v{{[0-9]+}}, v[[VAL]] 638 639; EG: LDS_UBYTE_READ_RET 640; EG: BFE_INT 641; EG: LDS_SHORT_WRITE 642define void @local_sextload_i8_to_i16(i16 addrspace(3)* %out, i8 addrspace(3)* %in) #0 { 643 %a = load i8, i8 addrspace(3)* %in 644 %ext = sext i8 %a to i16 645 store i16 %ext, i16 addrspace(3)* %out 646 ret void 647} 648 649; FUNC-LABEL: {{^}}local_zextload_v1i8_to_v1i16: 650 651; EG: LDS_UBYTE_READ_RET 652; EG: LDS_SHORT_WRITE 653define void @local_zextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 654 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 655 %ext = zext <1 x i8> %load to <1 x i16> 656 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out 657 ret void 658} 659 660; FUNC-LABEL: {{^}}local_sextload_v1i8_to_v1i16: 661 662; EG: LDS_UBYTE_READ_RET 663; EG: BFE_INT 664; EG: LDS_SHORT_WRITE 665define void @local_sextload_v1i8_to_v1i16(<1 x i16> addrspace(3)* %out, <1 x i8> addrspace(3)* %in) #0 { 666 %load = load <1 x i8>, <1 x i8> addrspace(3)* %in 667 %ext = sext <1 x i8> %load to <1 x i16> 668 store <1 x i16> %ext, <1 x i16> addrspace(3)* %out 669 ret void 670} 671 672; FUNC-LABEL: {{^}}local_zextload_v2i8_to_v2i16: 673 674; EG: LDS_USHORT_READ_RET 675; EG: LDS_WRITE 676define void @local_zextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 677 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 678 %ext = zext <2 x i8> %load to <2 x i16> 679 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out 680 ret void 681} 682 683; FUNC-LABEL: {{^}}local_sextload_v2i8_to_v2i16: 684 685; EG: LDS_USHORT_READ_RET 686; EG: BFE_INT 687; EG: BFE_INT 688; EG: LDS_WRITE 689define void @local_sextload_v2i8_to_v2i16(<2 x i16> addrspace(3)* %out, <2 x i8> addrspace(3)* %in) #0 { 690 %load = load <2 x i8>, <2 x i8> addrspace(3)* %in 691 %ext = sext <2 x i8> %load to <2 x i16> 692 store <2 x i16> %ext, <2 x i16> addrspace(3)* %out 693 ret void 694} 695 696; FUNC-LABEL: {{^}}local_zextload_v4i8_to_v4i16: 697 698; EG: LDS_READ_RET 699; EG: LDS_WRITE 700; EG: LDS_WRITE 701define void @local_zextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 702 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 703 %ext = zext <4 x i8> %load to <4 x i16> 704 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out 705 ret void 706} 707 708; FUNC-LABEL: {{^}}local_sextload_v4i8_to_v4i16: 709 710; EG: LDS_READ_RET 711; EG-DAG: BFE_INT 712; EG-DAG: BFE_INT 713; EG-DAG: BFE_INT 714; EG-DAG: ASHR 715; EG: LDS_WRITE 716; EG: LDS_WRITE 717define void @local_sextload_v4i8_to_v4i16(<4 x i16> addrspace(3)* %out, <4 x i8> addrspace(3)* %in) #0 { 718 %load = load <4 x i8>, <4 x i8> addrspace(3)* %in 719 %ext = sext <4 x i8> %load to <4 x i16> 720 store <4 x i16> %ext, <4 x i16> addrspace(3)* %out 721 ret void 722} 723 724; FUNC-LABEL: {{^}}local_zextload_v8i8_to_v8i16: 725 726; EG: LDS_READ_RET 727; EG: LDS_READ_RET 728; EG: LDS_WRITE 729; EG: LDS_WRITE 730; EG: LDS_WRITE 731; EG: LDS_WRITE 732define void @local_zextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 733 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 734 %ext = zext <8 x i8> %load to <8 x i16> 735 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out 736 ret void 737} 738 739; FUNC-LABEL: {{^}}local_sextload_v8i8_to_v8i16: 740 741; EG: LDS_READ_RET 742; EG: LDS_READ_RET 743; EG-DAG: BFE_INT 744; EG-DAG: BFE_INT 745; EG-DAG: BFE_INT 746; EG-DAG: BFE_INT 747; EG-DAG: BFE_INT 748; EG-DAG: BFE_INT 749; EG-DAG: ASHR 750; EG-DAG: ASHR 751; EG: LDS_WRITE 752; EG: LDS_WRITE 753; EG: LDS_WRITE 754; EG: LDS_WRITE 755define void @local_sextload_v8i8_to_v8i16(<8 x i16> addrspace(3)* %out, <8 x i8> addrspace(3)* %in) #0 { 756 %load = load <8 x i8>, <8 x i8> addrspace(3)* %in 757 %ext = sext <8 x i8> %load to <8 x i16> 758 store <8 x i16> %ext, <8 x i16> addrspace(3)* %out 759 ret void 760} 761 762; FUNC-LABEL: {{^}}local_zextload_v16i8_to_v16i16: 763 764; EG: LDS_READ_RET 765; EG: LDS_READ_RET 766; EG: LDS_READ_RET 767; EG: LDS_READ_RET 768; EG: LDS_WRITE 769; EG: LDS_WRITE 770; EG: LDS_WRITE 771; EG: LDS_WRITE 772; EG: LDS_WRITE 773; EG: LDS_WRITE 774; EG: LDS_WRITE 775; EG: LDS_WRITE 776define void @local_zextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 777 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 778 %ext = zext <16 x i8> %load to <16 x i16> 779 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out 780 ret void 781} 782 783; FUNC-LABEL: {{^}}local_sextload_v16i8_to_v16i16: 784 785; EG: LDS_READ_RET 786; EG: LDS_READ_RET 787; EG: LDS_READ_RET 788; EG: LDS_READ_RET 789; EG-DAG: BFE_INT 790; EG-DAG: BFE_INT 791; EG-DAG: BFE_INT 792; EG-DAG: BFE_INT 793; EG-DAG: BFE_INT 794; EG-DAG: BFE_INT 795; EG-DAG: BFE_INT 796; EG-DAG: BFE_INT 797; EG-DAG: BFE_INT 798; EG-DAG: BFE_INT 799; EG-DAG: BFE_INT 800; EG-DAG: BFE_INT 801; EG-DAG: ASHR 802; EG-DAG: ASHR 803; EG-DAG: ASHR 804; EG-DAG: ASHR 805; EG: LDS_WRITE 806; EG: LDS_WRITE 807; EG: LDS_WRITE 808; EG: LDS_WRITE 809; EG: LDS_WRITE 810; EG: LDS_WRITE 811; EG: LDS_WRITE 812; EG: LDS_WRITE 813define void @local_sextload_v16i8_to_v16i16(<16 x i16> addrspace(3)* %out, <16 x i8> addrspace(3)* %in) #0 { 814 %load = load <16 x i8>, <16 x i8> addrspace(3)* %in 815 %ext = sext <16 x i8> %load to <16 x i16> 816 store <16 x i16> %ext, <16 x i16> addrspace(3)* %out 817 ret void 818} 819 820; FUNC-LABEL: {{^}}local_zextload_v32i8_to_v32i16: 821 822; EG: LDS_READ_RET 823; EG: LDS_READ_RET 824; EG: LDS_READ_RET 825; EG: LDS_READ_RET 826; EG: LDS_READ_RET 827; EG: LDS_READ_RET 828; EG: LDS_READ_RET 829; EG: LDS_READ_RET 830; EG: LDS_WRITE 831; EG: LDS_WRITE 832; EG: LDS_WRITE 833; EG: LDS_WRITE 834; EG: LDS_WRITE 835; EG: LDS_WRITE 836; EG: LDS_WRITE 837; EG: LDS_WRITE 838; EG: LDS_WRITE 839; EG: LDS_WRITE 840; EG: LDS_WRITE 841; EG: LDS_WRITE 842; EG: LDS_WRITE 843; EG: LDS_WRITE 844; EG: LDS_WRITE 845; EG: LDS_WRITE 846define void @local_zextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 847 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 848 %ext = zext <32 x i8> %load to <32 x i16> 849 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out 850 ret void 851} 852 853; FUNC-LABEL: {{^}}local_sextload_v32i8_to_v32i16: 854 855; EG: LDS_READ_RET 856; EG: LDS_READ_RET 857; EG: LDS_READ_RET 858; EG: LDS_READ_RET 859; EG: LDS_READ_RET 860; EG: LDS_READ_RET 861; EG: LDS_READ_RET 862; EG: LDS_READ_RET 863; EG-DAG: BFE_INT 864; EG-DAG: BFE_INT 865; EG-DAG: BFE_INT 866; EG-DAG: BFE_INT 867; EG-DAG: BFE_INT 868; EG-DAG: BFE_INT 869; EG-DAG: BFE_INT 870; EG-DAG: BFE_INT 871; EG-DAG: BFE_INT 872; EG-DAG: BFE_INT 873; EG-DAG: BFE_INT 874; EG-DAG: BFE_INT 875; EG-DAG: BFE_INT 876; EG-DAG: BFE_INT 877; EG-DAG: BFE_INT 878; EG-DAG: BFE_INT 879; EG-DAG: BFE_INT 880; EG-DAG: BFE_INT 881; EG-DAG: BFE_INT 882; EG-DAG: BFE_INT 883; EG-DAG: BFE_INT 884; EG-DAG: BFE_INT 885; EG-DAG: BFE_INT 886; EG-DAG: BFE_INT 887; EG-DAG: ASHR 888; EG-DAG: ASHR 889; EG-DAG: ASHR 890; EG-DAG: ASHR 891; EG-DAG: ASHR 892; EG-DAG: ASHR 893; EG-DAG: ASHR 894; EG-DAG: ASHR 895; EG: LDS_WRITE 896; EG: LDS_WRITE 897; EG: LDS_WRITE 898; EG: LDS_WRITE 899; EG: LDS_WRITE 900; EG: LDS_WRITE 901; EG: LDS_WRITE 902; EG: LDS_WRITE 903; EG: LDS_WRITE 904; EG: LDS_WRITE 905; EG: LDS_WRITE 906; EG: LDS_WRITE 907; EG: LDS_WRITE 908; EG: LDS_WRITE 909; EG: LDS_WRITE 910; EG: LDS_WRITE 911define void @local_sextload_v32i8_to_v32i16(<32 x i16> addrspace(3)* %out, <32 x i8> addrspace(3)* %in) #0 { 912 %load = load <32 x i8>, <32 x i8> addrspace(3)* %in 913 %ext = sext <32 x i8> %load to <32 x i16> 914 store <32 x i16> %ext, <32 x i16> addrspace(3)* %out 915 ret void 916} 917 918; XFUNC-LABEL: {{^}}local_zextload_v64i8_to_v64i16: 919; define void @local_zextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 920; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 921; %ext = zext <64 x i8> %load to <64 x i16> 922; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out 923; ret void 924; } 925 926; XFUNC-LABEL: {{^}}local_sextload_v64i8_to_v64i16: 927; define void @local_sextload_v64i8_to_v64i16(<64 x i16> addrspace(3)* %out, <64 x i8> addrspace(3)* %in) #0 { 928; %load = load <64 x i8>, <64 x i8> addrspace(3)* %in 929; %ext = sext <64 x i8> %load to <64 x i16> 930; store <64 x i16> %ext, <64 x i16> addrspace(3)* %out 931; ret void 932; } 933 934attributes #0 = { nounwind } 935