1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3 | FileCheck %s --check-prefixes=X64,ALIGN 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+ssse3,sse-unaligned-mem | FileCheck %s --check-prefixes=X64,UNALIGN 5 6; There are no MMX operations in @t1 7 8define void @t1(i32 %a, ptr %P) nounwind { 9; X32-LABEL: t1: 10; X32: # %bb.0: 11; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 12; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 13; X32-NEXT: shll $12, %ecx 14; X32-NEXT: movd %ecx, %xmm0 15; X32-NEXT: psllq $32, %xmm0 16; X32-NEXT: movq %xmm0, (%eax) 17; X32-NEXT: retl 18; 19; X64-LABEL: t1: 20; X64: # %bb.0: 21; X64-NEXT: shll $12, %edi 22; X64-NEXT: movd %edi, %xmm0 23; X64-NEXT: psllq $32, %xmm0 24; X64-NEXT: movq %xmm0, (%rsi) 25; X64-NEXT: retq 26 %tmp12 = shl i32 %a, 12 27 %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1 28 %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0 29 %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx 30 store x86_mmx %tmp23, ptr %P 31 ret void 32} 33 34define <4 x float> @t2(ptr %P) nounwind { 35; X32-LABEL: t2: 36; X32: # %bb.0: 37; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 38; X32-NEXT: xorps %xmm0, %xmm0 39; X32-NEXT: xorps %xmm1, %xmm1 40; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] 41; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 42; X32-NEXT: retl 43; 44; X64-LABEL: t2: 45; X64: # %bb.0: 46; X64-NEXT: xorps %xmm0, %xmm0 47; X64-NEXT: xorps %xmm1, %xmm1 48; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] 49; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 50; X64-NEXT: retq 51 %tmp1 = load <4 x float>, ptr %P 52 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > 53 ret <4 x float> %tmp2 54} 55 56define <4 x float> @t3(ptr %P) nounwind { 57; X32-LABEL: t3: 58; X32: # %bb.0: 59; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 60; X32-NEXT: xorps %xmm0, %xmm0 61; X32-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 62; X32-NEXT: retl 63; 64; X64-LABEL: t3: 65; X64: # %bb.0: 66; X64-NEXT: xorps %xmm0, %xmm0 67; X64-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] 68; X64-NEXT: retq 69 %tmp1 = load <4 x float>, ptr %P 70 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 > 71 ret <4 x float> %tmp2 72} 73 74define <4 x float> @t4(ptr %P) nounwind { 75; X32-LABEL: t4: 76; X32: # %bb.0: 77; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 78; X32-NEXT: xorps %xmm1, %xmm1 79; X32-NEXT: xorps %xmm0, %xmm0 80; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 81; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 82; X32-NEXT: retl 83; 84; X64-LABEL: t4: 85; X64: # %bb.0: 86; X64-NEXT: xorps %xmm1, %xmm1 87; X64-NEXT: xorps %xmm0, %xmm0 88; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 89; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 90; X64-NEXT: retq 91 %tmp1 = load <4 x float>, ptr %P 92 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 93 ret <4 x float> %tmp2 94} 95 96define <4 x float> @t4_under_aligned(ptr %P) nounwind { 97; X32-LABEL: t4_under_aligned: 98; X32: # %bb.0: 99; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 100; X32-NEXT: movups (%eax), %xmm0 101; X32-NEXT: xorps %xmm1, %xmm1 102; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 103; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 104; X32-NEXT: retl 105; 106; ALIGN-LABEL: t4_under_aligned: 107; ALIGN: # %bb.0: 108; ALIGN-NEXT: movups (%rdi), %xmm0 109; ALIGN-NEXT: xorps %xmm1, %xmm1 110; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0] 111; ALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] 112; ALIGN-NEXT: retq 113; 114; UNALIGN-LABEL: t4_under_aligned: 115; UNALIGN: # %bb.0: 116; UNALIGN-NEXT: xorps %xmm1, %xmm1 117; UNALIGN-NEXT: xorps %xmm0, %xmm0 118; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],mem[3,0] 119; UNALIGN-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 120; UNALIGN-NEXT: retq 121 %tmp1 = load <4 x float>, ptr %P, align 4 122 %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 > 123 ret <4 x float> %tmp2 124} 125 126define <16 x i8> @t5(<16 x i8> %x) nounwind { 127; X32-LABEL: t5: 128; X32: # %bb.0: 129; X32-NEXT: psrlw $8, %xmm0 130; X32-NEXT: retl 131; 132; X64-LABEL: t5: 133; X64: # %bb.0: 134; X64-NEXT: psrlw $8, %xmm0 135; X64-NEXT: retq 136 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 137 ret <16 x i8> %s 138} 139 140define <16 x i8> @t6(<16 x i8> %x) nounwind { 141; X32-LABEL: t6: 142; X32: # %bb.0: 143; X32-NEXT: psrlw $8, %xmm0 144; X32-NEXT: retl 145; 146; X64-LABEL: t6: 147; X64: # %bb.0: 148; X64-NEXT: psrlw $8, %xmm0 149; X64-NEXT: retq 150 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 151 ret <16 x i8> %s 152} 153 154define <16 x i8> @t7(<16 x i8> %x) nounwind { 155; X32-LABEL: t7: 156; X32: # %bb.0: 157; X32-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 158; X32-NEXT: retl 159; 160; X64-LABEL: t7: 161; X64: # %bb.0: 162; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] 163; X64-NEXT: retq 164 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2> 165 ret <16 x i8> %s 166} 167 168define <16 x i8> @t8(<16 x i8> %x) nounwind { 169; X32-LABEL: t8: 170; X32: # %bb.0: 171; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 172; X32-NEXT: retl 173; 174; X64-LABEL: t8: 175; X64: # %bb.0: 176; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 177; X64-NEXT: retq 178 %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17> 179 ret <16 x i8> %s 180} 181 182define <16 x i8> @t9(<16 x i8> %x) nounwind { 183; X32-LABEL: t9: 184; X32: # %bb.0: 185; X32-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 186; X32-NEXT: retl 187; 188; X64-LABEL: t9: 189; X64: # %bb.0: 190; X64-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 191; X64-NEXT: retq 192 %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef> 193 ret <16 x i8> %s 194} 195