1 // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32 2 // RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 3 4 void __vectorcall v1(int a, int b) {} 5 // X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) 6 // X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b) 7 8 void __vectorcall v2(char a, char b) {} 9 // X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) 10 // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) 11 12 struct Small { int x; }; 13 void __vectorcall v3(int a, struct Small b, int c) {} 14 // X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) 15 // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) 16 17 struct Large { int a[5]; }; 18 void __vectorcall v4(int a, struct Large b, int c) {} 19 // X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) 20 // X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c) 21 22 struct HFA2 { double x, y; }; 23 struct HFA4 { double w, x, y, z; }; 24 struct HFA5 { double v, w, x, y, z; }; 25 26 void __vectorcall hfa1(int a, struct HFA4 b, int c) {} 27 // X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c) 28 // X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c) 29 30 // HFAs that would require more than six total SSE registers are passed 31 // indirectly. Additional vector arguments can consume the rest of the SSE 32 // registers. 33 void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} 34 // X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c) 35 // X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c) 36 37 // Ensure that we pass builtin types directly while counting them against the 38 // SSE register usage. 39 void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} 40 // X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) 41 // X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f) 42 43 // Aggregates with more than four elements are not HFAs and are passed byval. 44 // Because they are not classified as homogeneous, they don't get special 45 // handling to ensure alignment. 46 void __vectorcall hfa4(struct HFA5 a) {} 47 // X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) 48 // X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a) 49 50 // Return HFAs of 4 or fewer elements in registers. 51 static struct HFA2 g_hfa2; 52 struct HFA2 __vectorcall hfa5(void) { return g_hfa2; } 53 // X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() 54 // X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() 55 56 typedef float __attribute__((vector_size(16))) v4f32; 57 struct HVA2 { v4f32 x, y; }; 58 struct HVA3 { v4f32 w, x, y; }; 59 struct HVA4 { v4f32 w, x, y, z; }; 60 struct HVA5 { v4f32 w, x, y, z, p; }; 61 62 v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;} 63 // X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c) 64 // X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c) 65 66 v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;} 67 // X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c) 68 // X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c) 69 70 v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;} 71 // X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) 72 // X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) 73 74 // vector types have higher priority then HVA structures, So vector types are allocated first 75 // and HVAs are allocated if enough registers are available 76 v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;} 77 // X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c) 78 // X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c) 79 80 v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;} 81 // X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) 82 // X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) 83 84 struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;} 85 // X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b) 86 // X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b) 87 88 struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;} 89 // X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result) 90 // X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result) 91 92 v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;} 93 // X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f) 94 // X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) 95 96 typedef float __attribute__((ext_vector_type(3))) v3f32; 97 struct OddSizeHVA { v3f32 x, y; }; 98 99 void __vectorcall odd_size_hva(struct OddSizeHVA a) {} 100 // X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) 101 // X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) 102 103 // The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't 104 // consider 'p7' as a register. Instead p5 gets put into the register on the second pass. 105 struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;} 106 // X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) 107 // X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) 108