1 // RUN: %clang_cc1 -no-opaque-pointers -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
2 
3 #if !__has_extension(matrix_types)
4 #error Expected extension 'matrix_types' to be enabled
5 #endif
6 
7 #if !__has_extension(matrix_types_scalar_division)
8 #error Expected extension 'matrix_types_scalar_division' to be enabled
9 #endif
10 
11 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
12 
13 // CHECK: %struct.Matrix = type { i8, [12 x float], float }
14 
load_store_double(dx5x5_t * a,dx5x5_t * b)15 void load_store_double(dx5x5_t *a, dx5x5_t *b) {
16   // CHECK-LABEL:  define{{.*}} void @load_store_double(
17   // CHECK-NEXT:  entry:
18   // CHECK-NEXT:    %a.addr = alloca [25 x double]*, align 8
19   // CHECK-NEXT:    %b.addr = alloca [25 x double]*, align 8
20   // CHECK-NEXT:    store [25 x double]* %a, [25 x double]** %a.addr, align 8
21   // CHECK-NEXT:    store [25 x double]* %b, [25 x double]** %b.addr, align 8
22   // CHECK-NEXT:    %0 = load [25 x double]*, [25 x double]** %b.addr, align 8
23   // CHECK-NEXT:    %1 = bitcast [25 x double]* %0 to <25 x double>*
24   // CHECK-NEXT:    %2 = load <25 x double>, <25 x double>* %1, align 8
25   // CHECK-NEXT:    %3 = load [25 x double]*, [25 x double]** %a.addr, align 8
26   // CHECK-NEXT:    %4 = bitcast [25 x double]* %3 to <25 x double>*
27   // CHECK-NEXT:    store <25 x double> %2, <25 x double>* %4, align 8
28   // CHECK-NEXT:   ret void
29 
30   *a = *b;
31 }
32 
33 typedef float fx3x4_t __attribute__((matrix_type(3, 4)));
load_store_float(fx3x4_t * a,fx3x4_t * b)34 void load_store_float(fx3x4_t *a, fx3x4_t *b) {
35   // CHECK-LABEL:  define{{.*}} void @load_store_float(
36   // CHECK-NEXT:  entry:
37   // CHECK-NEXT:    %a.addr = alloca [12 x float]*, align 8
38   // CHECK-NEXT:    %b.addr = alloca [12 x float]*, align 8
39   // CHECK-NEXT:    store [12 x float]* %a, [12 x float]** %a.addr, align 8
40   // CHECK-NEXT:    store [12 x float]* %b, [12 x float]** %b.addr, align 8
41   // CHECK-NEXT:    %0 = load [12 x float]*, [12 x float]** %b.addr, align 8
42   // CHECK-NEXT:    %1 = bitcast [12 x float]* %0 to <12 x float>*
43   // CHECK-NEXT:    %2 = load <12 x float>, <12 x float>* %1, align 4
44   // CHECK-NEXT:    %3 = load [12 x float]*, [12 x float]** %a.addr, align 8
45   // CHECK-NEXT:    %4 = bitcast [12 x float]* %3 to <12 x float>*
46   // CHECK-NEXT:    store <12 x float> %2, <12 x float>* %4, align 4
47   // CHECK-NEXT:   ret void
48 
49   *a = *b;
50 }
51 
52 typedef int ix3x4_t __attribute__((matrix_type(4, 3)));
load_store_int(ix3x4_t * a,ix3x4_t * b)53 void load_store_int(ix3x4_t *a, ix3x4_t *b) {
54   // CHECK-LABEL:  define{{.*}} void @load_store_int(
55   // CHECK-NEXT:  entry:
56   // CHECK-NEXT:    %a.addr = alloca [12 x i32]*, align 8
57   // CHECK-NEXT:    %b.addr = alloca [12 x i32]*, align 8
58   // CHECK-NEXT:    store [12 x i32]* %a, [12 x i32]** %a.addr, align 8
59   // CHECK-NEXT:    store [12 x i32]* %b, [12 x i32]** %b.addr, align 8
60   // CHECK-NEXT:    %0 = load [12 x i32]*, [12 x i32]** %b.addr, align 8
61   // CHECK-NEXT:    %1 = bitcast [12 x i32]* %0 to <12 x i32>*
62   // CHECK-NEXT:    %2 = load <12 x i32>, <12 x i32>* %1, align 4
63   // CHECK-NEXT:    %3 = load [12 x i32]*, [12 x i32]** %a.addr, align 8
64   // CHECK-NEXT:    %4 = bitcast [12 x i32]* %3 to <12 x i32>*
65   // CHECK-NEXT:    store <12 x i32> %2, <12 x i32>* %4, align 4
66   // CHECK-NEXT:   ret void
67 
68   *a = *b;
69 }
70 
71 typedef unsigned long long ullx3x4_t __attribute__((matrix_type(4, 3)));
load_store_ull(ullx3x4_t * a,ullx3x4_t * b)72 void load_store_ull(ullx3x4_t *a, ullx3x4_t *b) {
73   // CHECK-LABEL:  define{{.*}} void @load_store_ull(
74   // CHECK-NEXT:  entry:
75   // CHECK-NEXT:    %a.addr = alloca [12 x i64]*, align 8
76   // CHECK-NEXT:    %b.addr = alloca [12 x i64]*, align 8
77   // CHECK-NEXT:    store [12 x i64]* %a, [12 x i64]** %a.addr, align 8
78   // CHECK-NEXT:    store [12 x i64]* %b, [12 x i64]** %b.addr, align 8
79   // CHECK-NEXT:    %0 = load [12 x i64]*, [12 x i64]** %b.addr, align 8
80   // CHECK-NEXT:    %1 = bitcast [12 x i64]* %0 to <12 x i64>*
81   // CHECK-NEXT:    %2 = load <12 x i64>, <12 x i64>* %1, align 8
82   // CHECK-NEXT:    %3 = load [12 x i64]*, [12 x i64]** %a.addr, align 8
83   // CHECK-NEXT:    %4 = bitcast [12 x i64]* %3 to <12 x i64>*
84   // CHECK-NEXT:    store <12 x i64> %2, <12 x i64>* %4, align 8
85   // CHECK-NEXT:   ret void
86 
87   *a = *b;
88 }
89 
90 typedef __fp16 fp16x3x4_t __attribute__((matrix_type(4, 3)));
load_store_fp16(fp16x3x4_t * a,fp16x3x4_t * b)91 void load_store_fp16(fp16x3x4_t *a, fp16x3x4_t *b) {
92   // CHECK-LABEL:  define{{.*}} void @load_store_fp16(
93   // CHECK-NEXT:  entry:
94   // CHECK-NEXT:    %a.addr = alloca [12 x half]*, align 8
95   // CHECK-NEXT:    %b.addr = alloca [12 x half]*, align 8
96   // CHECK-NEXT:    store [12 x half]* %a, [12 x half]** %a.addr, align 8
97   // CHECK-NEXT:    store [12 x half]* %b, [12 x half]** %b.addr, align 8
98   // CHECK-NEXT:    %0 = load [12 x half]*, [12 x half]** %b.addr, align 8
99   // CHECK-NEXT:    %1 = bitcast [12 x half]* %0 to <12 x half>*
100   // CHECK-NEXT:    %2 = load <12 x half>, <12 x half>* %1, align 2
101   // CHECK-NEXT:    %3 = load [12 x half]*, [12 x half]** %a.addr, align 8
102   // CHECK-NEXT:    %4 = bitcast [12 x half]* %3 to <12 x half>*
103   // CHECK-NEXT:    store <12 x half> %2, <12 x half>* %4, align 2
104   // CHECK-NEXT:   ret void
105 
106   *a = *b;
107 }
108 
109 typedef float fx3x3_t __attribute__((matrix_type(3, 3)));
110 
parameter_passing(fx3x3_t a,fx3x3_t * b)111 void parameter_passing(fx3x3_t a, fx3x3_t *b) {
112   // CHECK-LABEL: define{{.*}} void @parameter_passing(
113   // CHECK-NEXT:  entry:
114   // CHECK-NEXT:    %a.addr = alloca [9 x float], align 4
115   // CHECK-NEXT:    %b.addr = alloca [9 x float]*, align 8
116   // CHECK-NEXT:    %0 = bitcast [9 x float]* %a.addr to <9 x float>*
117   // CHECK-NEXT:    store <9 x float> %a, <9 x float>* %0, align 4
118   // CHECK-NEXT:    store [9 x float]* %b, [9 x float]** %b.addr, align 8
119   // CHECK-NEXT:    %1 = load <9 x float>, <9 x float>* %0, align 4
120   // CHECK-NEXT:    %2 = load [9 x float]*, [9 x float]** %b.addr, align 8
121   // CHECK-NEXT:    %3 = bitcast [9 x float]* %2 to <9 x float>*
122   // CHECK-NEXT:    store <9 x float> %1, <9 x float>* %3, align 4
123   // CHECK-NEXT:    ret void
124   *b = a;
125 }
126 
return_matrix(fx3x3_t * a)127 fx3x3_t return_matrix(fx3x3_t *a) {
128   // CHECK-LABEL: define{{.*}} <9 x float> @return_matrix
129   // CHECK-NEXT:  entry:
130   // CHECK-NEXT:    %a.addr = alloca [9 x float]*, align 8
131   // CHECK-NEXT:    store [9 x float]* %a, [9 x float]** %a.addr, align 8
132   // CHECK-NEXT:    %0 = load [9 x float]*, [9 x float]** %a.addr, align 8
133   // CHECK-NEXT:    %1 = bitcast [9 x float]* %0 to <9 x float>*
134   // CHECK-NEXT:    %2 = load <9 x float>, <9 x float>* %1, align 4
135   // CHECK-NEXT:    ret <9 x float> %2
136   return *a;
137 }
138 
139 typedef struct {
140   char Tmp1;
141   fx3x4_t Data;
142   float Tmp2;
143 } Matrix;
144 
matrix_struct(Matrix * a,Matrix * b)145 void matrix_struct(Matrix *a, Matrix *b) {
146   // CHECK-LABEL: define{{.*}} void @matrix_struct(
147   // CHECK-NEXT:  entry:
148   // CHECK-NEXT:    %a.addr = alloca %struct.Matrix*, align 8
149   // CHECK-NEXT:    %b.addr = alloca %struct.Matrix*, align 8
150   // CHECK-NEXT:    store %struct.Matrix* %a, %struct.Matrix** %a.addr, align 8
151   // CHECK-NEXT:    store %struct.Matrix* %b, %struct.Matrix** %b.addr, align 8
152   // CHECK-NEXT:    %0 = load %struct.Matrix*, %struct.Matrix** %a.addr, align 8
153   // CHECK-NEXT:    %Data = getelementptr inbounds %struct.Matrix, %struct.Matrix* %0, i32 0, i32 1
154   // CHECK-NEXT:    %1 = bitcast [12 x float]* %Data to <12 x float>*
155   // CHECK-NEXT:    %2 = load <12 x float>, <12 x float>* %1, align 4
156   // CHECK-NEXT:    %3 = load %struct.Matrix*, %struct.Matrix** %b.addr, align 8
157   // CHECK-NEXT:    %Data1 = getelementptr inbounds %struct.Matrix, %struct.Matrix* %3, i32 0, i32 1
158   // CHECK-NEXT:    %4 = bitcast [12 x float]* %Data1 to <12 x float>*
159   // CHECK-NEXT:    store <12 x float> %2, <12 x float>* %4, align 4
160   // CHECK-NEXT:    ret void
161   b->Data = a->Data;
162 }
163 
164 typedef double dx4x4_t __attribute__((matrix_type(4, 4)));
matrix_inline_asm_memory_readwrite(void)165 void matrix_inline_asm_memory_readwrite(void) {
166   // CHECK-LABEL: define{{.*}} void @matrix_inline_asm_memory_readwrite()
167   // CHECK-NEXT:  entry:
168   // CHECK-NEXT:    [[ALLOCA:%.+]] = alloca [16 x double], align 8
169   // CHECK-NEXT:    [[PTR1:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>*
170   // CHECK-NEXT:    [[PTR2:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>*
171   // CHECK-NEXT:    [[VAL:%.+]] = load <16 x double>, <16 x double>* [[PTR2]], align 8
172   // CHECK-NEXT:    call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(<16 x double>* elementtype(<16 x double>) [[PTR1]], <16 x double> [[VAL]])
173   // CHECK-NEXT:    ret void
174 
175   dx4x4_t m;
176   asm volatile(""
177                : "+r,m"(m)
178                :
179                : "memory");
180 }
181