1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s 5 6; FUNC-LABEL: {{^}}store_i1: 7; EG: MOVA_INT 8; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 9; EG: MOVA_INT 10; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 11 12; CM: MOVA_INT 13; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 14; CM: MOVA_INT 15; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 16 17; SI: buffer_store_byte 18define void @store_i1(i1 addrspace(0)* %out) { 19entry: 20 store i1 true, i1 addrspace(0)* %out 21 ret void 22} 23 24; i8 store 25; FUNC-LABEL: {{^}}store_i8: 26; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x 27; EG-NEXT: 2 28; EG: MOVA_INT * AR.x (MASKED) 29; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x 30 31; IG 0: Get the byte index and truncate the value 32; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x 33; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x 34; EG-NEXT: 3(4.203895e-45) 35; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x 36; EG-NEXT: 255(3.573311e-43) 37 38; EG: NOT_INT 39; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]] 40; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]] 41; TODO: Is the reload necessary? 42; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]] 43; EG: MOV * T(0 + AR.x).X+, [[RES]] 44 45; SI: buffer_store_byte 46 47define void @store_i8(i8 addrspace(0)* %out, i8 %in) { 48entry: 49 store i8 %in, i8 addrspace(0)* %out 50 ret void 51} 52 53; i16 store 54; FUNC-LABEL: {{^}}store_i16: 55; EG: LSHR * [[ADDRESS:T[0-9]\.[XYZW]]], KC0[2].Y, literal.x 56; EG-NEXT: 2 57; EG: MOVA_INT * AR.x (MASKED) 58; EG: MOV [[OLD:T[0-9]\.[XYZW]]], {{.*}}AR.x 59 60; IG 0: Get the byte index and truncate the value 61; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x 62; EG: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x 63; EG-NEXT: 3(4.203895e-45) 64; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.x 65; EG-NEXT: 65535(9.183409e-41) 66 67; EG: NOT_INT 68; EG: AND_INT {{[\* ]*}}[[CLR_CHAN:T[0-9]\.[XYZW]]], {{.*}}[[OLD]] 69; EG: OR_INT * [[RES:T[0-9]\.[XYZW]]] 70; TODO: Is the reload necessary? 71; EG: MOVA_INT * AR.x (MASKED), [[ADDRESS]] 72; EG: MOV * T(0 + AR.x).X+, [[RES]] 73 74; SI: buffer_store_short 75define void @store_i16(i16 addrspace(0)* %out, i16 %in) { 76entry: 77 store i16 %in, i16 addrspace(0)* %out 78 ret void 79} 80 81; FUNC-LABEL: {{^}}store_i24: 82; SI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 83; SI-DAG: buffer_store_byte 84; SI-DAG: buffer_store_short 85 86; EG: MOVA_INT 87; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 88; EG: MOVA_INT 89; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 90; TODO: This load and store can be eliminated 91; EG: MOVA_INT 92; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 93; EG: MOVA_INT 94; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 95 96; CM: MOVA_INT 97; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 98; CM: MOVA_INT 99; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 100; TODO: This load and store can be eliminated 101; CM: MOVA_INT 102; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 103; CM: MOVA_INT 104; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 105define void @store_i24(i24 addrspace(0)* %out, i24 %in) { 106entry: 107 store i24 %in, i24 addrspace(0)* %out 108 ret void 109} 110 111; FUNC-LABEL: {{^}}store_i25: 112; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}} 113; SI: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]] 114; SI: buffer_store_dword [[VAND]] 115 116; EG: MOVA_INT 117; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 118; EG-NOT: MOVA_INT 119 120; CM: MOVA_INT 121; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 122; CM-NOT: MOVA_INT 123define void @store_i25(i25 addrspace(0)* %out, i25 %in) { 124entry: 125 store i25 %in, i25 addrspace(0)* %out 126 ret void 127} 128 129; FUNC-LABEL: {{^}}store_v2i8: 130; v2i8 is naturally 2B aligned, treat as i16 131; EG: MOVA_INT 132; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 133; EG: MOVA_INT 134; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 135; EG-NOT: MOVA_INT 136 137; CM: MOVA_INT 138; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 139; CM: MOVA_INT 140; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 141; CM-NOT: MOVA_INT 142 143; SI: buffer_store_short 144define void @store_v2i8(<2 x i8> addrspace(0)* %out, <2 x i32> %in) { 145entry: 146 %0 = trunc <2 x i32> %in to <2 x i8> 147 store <2 x i8> %0, <2 x i8> addrspace(0)* %out 148 ret void 149} 150 151; FUNC-LABEL: {{^}}store_v2i8_unaligned: 152; EG: MOVA_INT 153; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 154; EG: MOVA_INT 155; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 156; TODO: This load and store cannot be eliminated, 157; they might be different locations 158; EG: MOVA_INT 159; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 160; EG: MOVA_INT 161; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 162 163; CM: MOVA_INT 164; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 165; CM: MOVA_INT 166; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 167; TODO: This load and store cannot be eliminated, 168; they might be different locations 169; CM: MOVA_INT 170; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 171; CM: MOVA_INT 172; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 173 174; SI: buffer_store_byte 175define void @store_v2i8_unaligned(<2 x i8> addrspace(0)* %out, <2 x i32> %in) { 176entry: 177 %0 = trunc <2 x i32> %in to <2 x i8> 178 store <2 x i8> %0, <2 x i8> addrspace(0)* %out, align 1 179 ret void 180} 181 182 183; FUNC-LABEL: {{^}}store_v2i16: 184; v2i8 is naturally 2B aligned, treat as i16 185; EG: MOVA_INT 186; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 187; EG-NOT: MOVA_INT 188 189; CM: MOVA_INT 190; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 191; CM-NOT: MOVA_INT 192 193; SI: buffer_store_dword 194define void @store_v2i16(<2 x i16> addrspace(0)* %out, <2 x i32> %in) { 195entry: 196 %0 = trunc <2 x i32> %in to <2 x i16> 197 store <2 x i16> %0, <2 x i16> addrspace(0)* %out 198 ret void 199} 200 201; FUNC-LABEL: {{^}}store_v2i16_unaligned: 202; EG: MOVA_INT 203; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 204; EG: MOVA_INT 205; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 206; TODO: This load and store cannot be eliminated, 207; they might be different locations 208; EG: MOVA_INT 209; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 210; EG: MOVA_INT 211; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 212 213; CM: MOVA_INT 214; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 215; CM: MOVA_INT 216; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 217; TODO: This load and store cannot be eliminated, 218; they might be different locations 219; CM: MOVA_INT 220; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 221; CM: MOVA_INT 222; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 223 224; SI: buffer_store_short 225; SI: buffer_store_short 226define void @store_v2i16_unaligned(<2 x i16> addrspace(0)* %out, <2 x i32> %in) { 227entry: 228 %0 = trunc <2 x i32> %in to <2 x i16> 229 store <2 x i16> %0, <2 x i16> addrspace(0)* %out, align 2 230 ret void 231} 232 233; FUNC-LABEL: {{^}}store_v4i8: 234; EG: MOVA_INT 235; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 236; EG-NOT: MOVA_INT 237 238; CM: MOVA_INT 239; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 240; CM-NOT: MOVA_INT 241 242; SI: buffer_store_dword 243define void @store_v4i8(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { 244entry: 245 %0 = trunc <4 x i32> %in to <4 x i8> 246 store <4 x i8> %0, <4 x i8> addrspace(0)* %out 247 ret void 248} 249 250; FUNC-LABEL: {{^}}store_v4i8_unaligned: 251; EG: MOVA_INT 252; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 253; EG: MOVA_INT 254; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 255; TODO: This load and store cannot be eliminated, 256; they might be different locations 257; EG: MOVA_INT 258; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 259; EG: MOVA_INT 260; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 261; TODO: This load and store cannot be eliminated, 262; they might be different locations 263; EG: MOVA_INT 264; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 265; EG: MOVA_INT 266; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 267; TODO: This load and store cannot be eliminated, 268; they might be different locations 269; EG: MOVA_INT 270; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 271; EG: MOVA_INT 272; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 273 274; CM: MOVA_INT 275; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 276; CM: MOVA_INT 277; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 278; TODO: This load and store cannot be eliminated, 279; they might be different locations 280; CM: MOVA_INT 281; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 282; CM: MOVA_INT 283; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 284; TODO: This load and store cannot be eliminated, 285; they might be different locations 286; CM: MOVA_INT 287; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 288; CM: MOVA_INT 289; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 290; TODO: This load and store cannot be eliminated, 291; they might be different locations 292; CM: MOVA_INT 293; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 294; CM: MOVA_INT 295; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 296 297; SI: buffer_store_byte 298; SI: buffer_store_byte 299; SI: buffer_store_byte 300; SI: buffer_store_byte 301; SI-NOT: buffer_store_dword 302define void @store_v4i8_unaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { 303entry: 304 %0 = trunc <4 x i32> %in to <4 x i8> 305 store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 1 306 ret void 307} 308 309; FUNC-LABEL: {{^}}store_v8i8_unaligned: 310; EG: MOVA_INT 311; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 312; EG: MOVA_INT 313; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 314; TODO: This load and store cannot be eliminated, 315; they might be different locations 316; EG: MOVA_INT 317; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 318; EG: MOVA_INT 319; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 320; TODO: This load and store cannot be eliminated, 321; they might be different locations 322; EG: MOVA_INT 323; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 324; EG: MOVA_INT 325; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 326; TODO: This load and store cannot be eliminated, 327; they might be different locations 328; EG: MOVA_INT 329; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 330; EG: MOVA_INT 331; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 332; TODO: This load and store cannot be eliminated, 333; they might be different locations 334; EG: MOVA_INT 335; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 336; EG: MOVA_INT 337; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 338; TODO: This load and store cannot be eliminated, 339; they might be different locations 340; EG: MOVA_INT 341; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 342; EG: MOVA_INT 343; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 344; TODO: This load and store cannot be eliminated, 345; they might be different locations 346; EG: MOVA_INT 347; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 348; EG: MOVA_INT 349; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 350; TODO: This load and store cannot be eliminated, 351; they might be different locations 352; EG: MOVA_INT 353; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 354; EG: MOVA_INT 355; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 356 357; CM: MOVA_INT 358; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 359; CM: MOVA_INT 360; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 361; TODO: This load and store cannot be eliminated, 362; they might be different locations 363; CM: MOVA_INT 364; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 365; CM: MOVA_INT 366; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 367; TODO: This load and store cannot be eliminated, 368; they might be different locations 369; CM: MOVA_INT 370; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 371; CM: MOVA_INT 372; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 373; TODO: This load and store cannot be eliminated, 374; they might be different locations 375; CM: MOVA_INT 376; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 377; CM: MOVA_INT 378; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 379; TODO: This load and store cannot be eliminated, 380; they might be different locations 381; CM: MOVA_INT 382; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 383; CM: MOVA_INT 384; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 385; TODO: This load and store cannot be eliminated, 386; they might be different locations 387; CM: MOVA_INT 388; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 389; CM: MOVA_INT 390; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 391; TODO: This load and store cannot be eliminated, 392; they might be different locations 393; CM: MOVA_INT 394; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 395; CM: MOVA_INT 396; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 397; TODO: This load and store cannot be eliminated, 398; they might be different locations 399; CM: MOVA_INT 400; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 401; CM: MOVA_INT 402; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 403 404; SI: buffer_store_byte 405; SI: buffer_store_byte 406; SI: buffer_store_byte 407; SI: buffer_store_byte 408; SI: buffer_store_byte 409; SI: buffer_store_byte 410; SI: buffer_store_byte 411; SI: buffer_store_byte 412; SI-NOT: buffer_store_dword 413define void @store_v8i8_unaligned(<8 x i8> addrspace(0)* %out, <8 x i32> %in) { 414entry: 415 %0 = trunc <8 x i32> %in to <8 x i8> 416 store <8 x i8> %0, <8 x i8> addrspace(0)* %out, align 1 417 ret void 418} 419 420; FUNC-LABEL: {{^}}store_v4i8_halfaligned: 421; EG: MOVA_INT 422; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 423; EG: MOVA_INT 424; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 425; TODO: This load and store cannot be eliminated, 426; they might be different locations 427; EG: MOVA_INT 428; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 429; EG: MOVA_INT 430; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 431 432; CM: MOVA_INT 433; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 434; CM: MOVA_INT 435; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 436; TODO: This load and store cannot be eliminated, 437; they might be different locations 438; CM: MOVA_INT 439; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 440; CM: MOVA_INT 441; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 442 443; SI: buffer_store_short 444; SI: buffer_store_short 445; SI-NOT: buffer_store_dword 446define void @store_v4i8_halfaligned(<4 x i8> addrspace(0)* %out, <4 x i32> %in) { 447entry: 448 %0 = trunc <4 x i32> %in to <4 x i8> 449 store <4 x i8> %0, <4 x i8> addrspace(0)* %out, align 2 450 ret void 451} 452 453; floating-point store 454; FUNC-LABEL: {{^}}store_f32: 455; EG: MOVA_INT 456; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 457 458; CM: MOVA_INT 459; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 460 461; SI: buffer_store_dword 462 463define void @store_f32(float addrspace(0)* %out, float %in) { 464 store float %in, float addrspace(0)* %out 465 ret void 466} 467 468; FUNC-LABEL: {{^}}store_v4i16: 469; EG: MOVA_INT 470; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 471; EG: MOVA_INT 472; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 473 474; CM: MOVA_INT 475; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 476; CM: MOVA_INT 477; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 478 479;TODO: why not x2? 480; XSI: buffer_store_dwordx2 481; SI: buffer_store_dword 482; SI: buffer_store_dword 483define void @store_v4i16(<4 x i16> addrspace(0)* %out, <4 x i32> %in) { 484entry: 485 %0 = trunc <4 x i32> %in to <4 x i16> 486 store <4 x i16> %0, <4 x i16> addrspace(0)* %out 487 ret void 488} 489 490; vec2 floating-point stores 491; FUNC-LABEL: {{^}}store_v2f32: 492; EG: MOVA_INT 493; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 494; EG: MOVA_INT 495; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 496 497; CM: MOVA_INT 498; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 499; CM: MOVA_INT 500; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 501 502;TODO: why not x2? 503; XSI: buffer_store_dwordx2 504; SI: buffer_store_dword 505; SI: buffer_store_dword 506 507define void @store_v2f32(<2 x float> addrspace(0)* %out, float %a, float %b) { 508entry: 509 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 510 %1 = insertelement <2 x float> %0, float %b, i32 1 511 store <2 x float> %1, <2 x float> addrspace(0)* %out 512 ret void 513} 514 515; FUNC-LABEL: {{^}}store_v3i32: 516; EG: MOVA_INT 517; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 518; EG: MOVA_INT 519; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 520; EG: MOVA_INT 521; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 522 523; CM: MOVA_INT 524; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 525; CM: MOVA_INT 526; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 527; CM: MOVA_INT 528; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 529 530;TODO: why not x2? 531; XSI-DAG: buffer_store_dwordx2 532; SI: buffer_store_dword 533; SI: buffer_store_dword 534; SI: buffer_store_dword 535 536define void @store_v3i32(<3 x i32> addrspace(0)* %out, <3 x i32> %a) nounwind { 537 store <3 x i32> %a, <3 x i32> addrspace(0)* %out, align 16 538 ret void 539} 540 541; FUNC-LABEL: {{^}}store_v4i32: 542; EG: MOVA_INT 543; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 544; EG: MOVA_INT 545; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 546; EG: MOVA_INT 547; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 548; EG: MOVA_INT 549; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 550 551; CM: MOVA_INT 552; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 553; CM: MOVA_INT 554; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 555; CM: MOVA_INT 556; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 557; CM: MOVA_INT 558; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 559 560;TODO: why not x4? 561; XSI: buffer_store_dwordx4 562; SI: buffer_store_dword 563; SI: buffer_store_dword 564; SI: buffer_store_dword 565; SI: buffer_store_dword 566define void @store_v4i32(<4 x i32> addrspace(0)* %out, <4 x i32> %in) { 567entry: 568 store <4 x i32> %in, <4 x i32> addrspace(0)* %out 569 ret void 570} 571 572; FUNC-LABEL: {{^}}store_v4i32_unaligned: 573; EG: MOVA_INT 574; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 575; EG: MOVA_INT 576; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 577; EG: MOVA_INT 578; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 579; EG: MOVA_INT 580; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 581 582; CM: MOVA_INT 583; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 584; CM: MOVA_INT 585; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 586; CM: MOVA_INT 587; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 588; CM: MOVA_INT 589; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 590 591;TODO: why not x4? 592; XSI: buffer_store_dwordx4 593; SI: buffer_store_dword 594; SI: buffer_store_dword 595; SI: buffer_store_dword 596; SI: buffer_store_dword 597define void @store_v4i32_unaligned(<4 x i32> addrspace(0)* %out, <4 x i32> %in) { 598entry: 599 store <4 x i32> %in, <4 x i32> addrspace(0)* %out, align 4 600 ret void 601} 602 603; v4f32 store 604; FUNC-LABEL: {{^}}store_v4f32: 605; EG: MOVA_INT 606; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 607; EG: MOVA_INT 608; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 609; EG: MOVA_INT 610; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 611; EG: MOVA_INT 612; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 613 614; CM: MOVA_INT 615; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 616; CM: MOVA_INT 617; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 618; CM: MOVA_INT 619; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 620; CM: MOVA_INT 621; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 622 623;TODO: why not x4? 624; XSI: buffer_store_dwordx4 625; SI: buffer_store_dword 626; SI: buffer_store_dword 627; SI: buffer_store_dword 628; SI: buffer_store_dword 629define void @store_v4f32(<4 x float> addrspace(0)* %out, <4 x float> addrspace(0)* %in) { 630 %1 = load <4 x float>, <4 x float> addrspace(0) * %in 631 store <4 x float> %1, <4 x float> addrspace(0)* %out 632 ret void 633} 634 635; FUNC-LABEL: {{^}}store_i64_i8: 636; EG: MOVA_INT 637; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 638; EG: MOVA_INT 639; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 640 641; CM: MOVA_INT 642; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 643; CM: MOVA_INT 644; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 645 646; SI: buffer_store_byte 647define void @store_i64_i8(i8 addrspace(0)* %out, i64 %in) { 648entry: 649 %0 = trunc i64 %in to i8 650 store i8 %0, i8 addrspace(0)* %out 651 ret void 652} 653 654; FUNC-LABEL: {{^}}store_i64_i16: 655; EG: MOVA_INT 656; EG: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 657; EG: MOVA_INT 658; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 659 660; CM: MOVA_INT 661; CM: MOV {{[\* ]*}}{{T[0-9]+\.[XYZW]}}, T(0 + AR.x).X+, 662; CM: MOVA_INT 663; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 664 665; SI: buffer_store_short 666define void @store_i64_i16(i16 addrspace(0)* %out, i64 %in) { 667entry: 668 %0 = trunc i64 %in to i16 669 store i16 %0, i16 addrspace(0)* %out 670 ret void 671} 672 673; The stores in this function are combined by the optimizer to create a 674; 64-bit store with 32-bit alignment. This is legal and the legalizer 675; should not try to split the 64-bit store back into 2 32-bit stores. 676 677; FUNC-LABEL: {{^}}vecload2: 678; EG: MOVA_INT 679; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 680; EG: MOVA_INT 681; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 682 683; CM: MOVA_INT 684; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 685; CM: MOVA_INT 686; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 687 688;TODO: why not x2? 689; XSI: buffer_store_dwordx2 690; SI: buffer_store_dword 691; SI: buffer_store_dword 692define void @vecload2(i32 addrspace(0)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { 693entry: 694 %0 = load i32, i32 addrspace(2)* %mem, align 4 695 %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1 696 %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4 697 store i32 %0, i32 addrspace(0)* %out, align 4 698 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1 699 store i32 %1, i32 addrspace(0)* %arrayidx1, align 4 700 ret void 701} 702 703; When i128 was a legal type this program generated cannot select errors: 704 705; FUNC-LABEL: {{^}}"i128-const-store": 706; EG: MOVA_INT 707; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 708; EG: MOVA_INT 709; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 710; EG: MOVA_INT 711; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 712; EG: MOVA_INT 713; EG: MOV {{[\* ]*}}T(0 + AR.x).X+, 714 715; CM: MOVA_INT 716; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 717; CM: MOVA_INT 718; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 719; CM: MOVA_INT 720; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 721; CM: MOVA_INT 722; CM: MOV {{[\* ]*}}T(0 + AR.x).X+, 723 724;TODO: why not x4? 725; XSI: buffer_store_dwordx4 726; SI: buffer_store_dword 727; SI: buffer_store_dword 728; SI: buffer_store_dword 729; SI: buffer_store_dword 730define void @i128-const-store(i32 addrspace(0)* %out) { 731entry: 732 store i32 1, i32 addrspace(0)* %out, align 4 733 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 1 734 store i32 1, i32 addrspace(0)* %arrayidx2, align 4 735 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 2 736 store i32 2, i32 addrspace(0)* %arrayidx4, align 4 737 %arrayidx6 = getelementptr inbounds i32, i32 addrspace(0)* %out, i64 3 738 store i32 2, i32 addrspace(0)* %arrayidx6, align 4 739 ret void 740} 741 742 743attributes #0 = { nounwind } 744