1 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s 2 3 #if !__has_extension(matrix_types) 4 #error Expected extension 'matrix_types' to be enabled 5 #endif 6 7 #if !__has_extension(matrix_types_scalar_division) 8 #error Expected extension 'matrix_types_scalar_division' to be enabled 9 #endif 10 11 typedef double dx5x5_t __attribute__((matrix_type(5, 5))); 12 13 // CHECK: %struct.Matrix = type { i8, [12 x float], float } 14 15 void load_store_double(dx5x5_t *a, dx5x5_t *b) { 16 // CHECK-LABEL: define{{.*}} void @load_store_double( 17 // CHECK-NEXT: entry: 18 // CHECK-NEXT: %a.addr = alloca [25 x double]*, align 8 19 // CHECK-NEXT: %b.addr = alloca [25 x double]*, align 8 20 // CHECK-NEXT: store [25 x double]* %a, [25 x double]** %a.addr, align 8 21 // CHECK-NEXT: store [25 x double]* %b, [25 x double]** %b.addr, align 8 22 // CHECK-NEXT: %0 = load [25 x double]*, [25 x double]** %b.addr, align 8 23 // CHECK-NEXT: %1 = bitcast [25 x double]* %0 to <25 x double>* 24 // CHECK-NEXT: %2 = load <25 x double>, <25 x double>* %1, align 8 25 // CHECK-NEXT: %3 = load [25 x double]*, [25 x double]** %a.addr, align 8 26 // CHECK-NEXT: %4 = bitcast [25 x double]* %3 to <25 x double>* 27 // CHECK-NEXT: store <25 x double> %2, <25 x double>* %4, align 8 28 // CHECK-NEXT: ret void 29 30 *a = *b; 31 } 32 33 typedef float fx3x4_t __attribute__((matrix_type(3, 4))); 34 void load_store_float(fx3x4_t *a, fx3x4_t *b) { 35 // CHECK-LABEL: define{{.*}} void @load_store_float( 36 // CHECK-NEXT: entry: 37 // CHECK-NEXT: %a.addr = alloca [12 x float]*, align 8 38 // CHECK-NEXT: %b.addr = alloca [12 x float]*, align 8 39 // CHECK-NEXT: store [12 x float]* %a, [12 x float]** %a.addr, align 8 40 // CHECK-NEXT: store [12 x float]* %b, [12 x float]** %b.addr, align 8 41 // CHECK-NEXT: %0 = load [12 x float]*, [12 x float]** %b.addr, align 8 42 // CHECK-NEXT: %1 = bitcast [12 x float]* %0 to <12 x float>* 43 // CHECK-NEXT: %2 = load <12 x float>, <12 x float>* %1, align 4 44 // CHECK-NEXT: %3 = load [12 x float]*, [12 x float]** %a.addr, align 8 45 // CHECK-NEXT: %4 = bitcast [12 x float]* %3 to <12 x float>* 46 // CHECK-NEXT: store <12 x float> %2, <12 x float>* %4, align 4 47 // CHECK-NEXT: ret void 48 49 *a = *b; 50 } 51 52 typedef int ix3x4_t __attribute__((matrix_type(4, 3))); 53 void load_store_int(ix3x4_t *a, ix3x4_t *b) { 54 // CHECK-LABEL: define{{.*}} void @load_store_int( 55 // CHECK-NEXT: entry: 56 // CHECK-NEXT: %a.addr = alloca [12 x i32]*, align 8 57 // CHECK-NEXT: %b.addr = alloca [12 x i32]*, align 8 58 // CHECK-NEXT: store [12 x i32]* %a, [12 x i32]** %a.addr, align 8 59 // CHECK-NEXT: store [12 x i32]* %b, [12 x i32]** %b.addr, align 8 60 // CHECK-NEXT: %0 = load [12 x i32]*, [12 x i32]** %b.addr, align 8 61 // CHECK-NEXT: %1 = bitcast [12 x i32]* %0 to <12 x i32>* 62 // CHECK-NEXT: %2 = load <12 x i32>, <12 x i32>* %1, align 4 63 // CHECK-NEXT: %3 = load [12 x i32]*, [12 x i32]** %a.addr, align 8 64 // CHECK-NEXT: %4 = bitcast [12 x i32]* %3 to <12 x i32>* 65 // CHECK-NEXT: store <12 x i32> %2, <12 x i32>* %4, align 4 66 // CHECK-NEXT: ret void 67 68 *a = *b; 69 } 70 71 typedef unsigned long long ullx3x4_t __attribute__((matrix_type(4, 3))); 72 void load_store_ull(ullx3x4_t *a, ullx3x4_t *b) { 73 // CHECK-LABEL: define{{.*}} void @load_store_ull( 74 // CHECK-NEXT: entry: 75 // CHECK-NEXT: %a.addr = alloca [12 x i64]*, align 8 76 // CHECK-NEXT: %b.addr = alloca [12 x i64]*, align 8 77 // CHECK-NEXT: store [12 x i64]* %a, [12 x i64]** %a.addr, align 8 78 // CHECK-NEXT: store [12 x i64]* %b, [12 x i64]** %b.addr, align 8 79 // CHECK-NEXT: %0 = load [12 x i64]*, [12 x i64]** %b.addr, align 8 80 // CHECK-NEXT: %1 = bitcast [12 x i64]* %0 to <12 x i64>* 81 // CHECK-NEXT: %2 = load <12 x i64>, <12 x i64>* %1, align 8 82 // CHECK-NEXT: %3 = load [12 x i64]*, [12 x i64]** %a.addr, align 8 83 // CHECK-NEXT: %4 = bitcast [12 x i64]* %3 to <12 x i64>* 84 // CHECK-NEXT: store <12 x i64> %2, <12 x i64>* %4, align 8 85 // CHECK-NEXT: ret void 86 87 *a = *b; 88 } 89 90 typedef __fp16 fp16x3x4_t __attribute__((matrix_type(4, 3))); 91 void load_store_fp16(fp16x3x4_t *a, fp16x3x4_t *b) { 92 // CHECK-LABEL: define{{.*}} void @load_store_fp16( 93 // CHECK-NEXT: entry: 94 // CHECK-NEXT: %a.addr = alloca [12 x half]*, align 8 95 // CHECK-NEXT: %b.addr = alloca [12 x half]*, align 8 96 // CHECK-NEXT: store [12 x half]* %a, [12 x half]** %a.addr, align 8 97 // CHECK-NEXT: store [12 x half]* %b, [12 x half]** %b.addr, align 8 98 // CHECK-NEXT: %0 = load [12 x half]*, [12 x half]** %b.addr, align 8 99 // CHECK-NEXT: %1 = bitcast [12 x half]* %0 to <12 x half>* 100 // CHECK-NEXT: %2 = load <12 x half>, <12 x half>* %1, align 2 101 // CHECK-NEXT: %3 = load [12 x half]*, [12 x half]** %a.addr, align 8 102 // CHECK-NEXT: %4 = bitcast [12 x half]* %3 to <12 x half>* 103 // CHECK-NEXT: store <12 x half> %2, <12 x half>* %4, align 2 104 // CHECK-NEXT: ret void 105 106 *a = *b; 107 } 108 109 typedef float fx3x3_t __attribute__((matrix_type(3, 3))); 110 111 void parameter_passing(fx3x3_t a, fx3x3_t *b) { 112 // CHECK-LABEL: define{{.*}} void @parameter_passing( 113 // CHECK-NEXT: entry: 114 // CHECK-NEXT: %a.addr = alloca [9 x float], align 4 115 // CHECK-NEXT: %b.addr = alloca [9 x float]*, align 8 116 // CHECK-NEXT: %0 = bitcast [9 x float]* %a.addr to <9 x float>* 117 // CHECK-NEXT: store <9 x float> %a, <9 x float>* %0, align 4 118 // CHECK-NEXT: store [9 x float]* %b, [9 x float]** %b.addr, align 8 119 // CHECK-NEXT: %1 = load <9 x float>, <9 x float>* %0, align 4 120 // CHECK-NEXT: %2 = load [9 x float]*, [9 x float]** %b.addr, align 8 121 // CHECK-NEXT: %3 = bitcast [9 x float]* %2 to <9 x float>* 122 // CHECK-NEXT: store <9 x float> %1, <9 x float>* %3, align 4 123 // CHECK-NEXT: ret void 124 *b = a; 125 } 126 127 fx3x3_t return_matrix(fx3x3_t *a) { 128 // CHECK-LABEL: define{{.*}} <9 x float> @return_matrix 129 // CHECK-NEXT: entry: 130 // CHECK-NEXT: %a.addr = alloca [9 x float]*, align 8 131 // CHECK-NEXT: store [9 x float]* %a, [9 x float]** %a.addr, align 8 132 // CHECK-NEXT: %0 = load [9 x float]*, [9 x float]** %a.addr, align 8 133 // CHECK-NEXT: %1 = bitcast [9 x float]* %0 to <9 x float>* 134 // CHECK-NEXT: %2 = load <9 x float>, <9 x float>* %1, align 4 135 // CHECK-NEXT: ret <9 x float> %2 136 return *a; 137 } 138 139 typedef struct { 140 char Tmp1; 141 fx3x4_t Data; 142 float Tmp2; 143 } Matrix; 144 145 void matrix_struct(Matrix *a, Matrix *b) { 146 // CHECK-LABEL: define{{.*}} void @matrix_struct( 147 // CHECK-NEXT: entry: 148 // CHECK-NEXT: %a.addr = alloca %struct.Matrix*, align 8 149 // CHECK-NEXT: %b.addr = alloca %struct.Matrix*, align 8 150 // CHECK-NEXT: store %struct.Matrix* %a, %struct.Matrix** %a.addr, align 8 151 // CHECK-NEXT: store %struct.Matrix* %b, %struct.Matrix** %b.addr, align 8 152 // CHECK-NEXT: %0 = load %struct.Matrix*, %struct.Matrix** %a.addr, align 8 153 // CHECK-NEXT: %Data = getelementptr inbounds %struct.Matrix, %struct.Matrix* %0, i32 0, i32 1 154 // CHECK-NEXT: %1 = bitcast [12 x float]* %Data to <12 x float>* 155 // CHECK-NEXT: %2 = load <12 x float>, <12 x float>* %1, align 4 156 // CHECK-NEXT: %3 = load %struct.Matrix*, %struct.Matrix** %b.addr, align 8 157 // CHECK-NEXT: %Data1 = getelementptr inbounds %struct.Matrix, %struct.Matrix* %3, i32 0, i32 1 158 // CHECK-NEXT: %4 = bitcast [12 x float]* %Data1 to <12 x float>* 159 // CHECK-NEXT: store <12 x float> %2, <12 x float>* %4, align 4 160 // CHECK-NEXT: ret void 161 b->Data = a->Data; 162 } 163 164 typedef double dx4x4_t __attribute__((matrix_type(4, 4))); 165 void matrix_inline_asm_memory_readwrite() { 166 // CHECK-LABEL: define{{.*}} void @matrix_inline_asm_memory_readwrite() 167 // CHECK-NEXT: entry: 168 // CHECK-NEXT: [[ALLOCA:%.+]] = alloca [16 x double], align 8 169 // CHECK-NEXT: [[PTR1:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>* 170 // CHECK-NEXT: [[PTR2:%.+]] = bitcast [16 x double]* [[ALLOCA]] to <16 x double>* 171 // CHECK-NEXT: [[VAL:%.+]] = load <16 x double>, <16 x double>* [[PTR2]], align 8 172 // CHECK-NEXT: call void asm sideeffect "", "=*r|m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(<16 x double>* elementtype(<16 x double>) [[PTR1]], <16 x double> [[VAL]]) 173 // CHECK-NEXT: ret void 174 175 dx4x4_t m; 176 asm volatile("" 177 : "+r,m"(m) 178 : 179 : "memory"); 180 } 181