1 //===-- Elementary operations for aarch64 --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
11 
12 #include "src/__support/architectures.h"
13 
14 #if defined(LLVM_LIBC_ARCH_AARCH64)
15 
16 #include <src/string/memory_utils/elements.h>
17 #include <stddef.h> // size_t
18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
19 
20 #ifdef __ARM_NEON
21 #include <arm_neon.h>
22 #endif
23 
24 namespace __llvm_libc {
25 namespace aarch64_memset {
26 #ifdef __ARM_NEON
27 struct Splat8 {
28   static constexpr size_t SIZE = 8;
splat_setSplat829   static void splat_set(char *dst, const unsigned char value) {
30     vst1_u8((uint8_t *)dst, vdup_n_u8(value));
31   }
32 };
33 
34 struct Splat16 {
35   static constexpr size_t SIZE = 16;
splat_setSplat1636   static void splat_set(char *dst, const unsigned char value) {
37     vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
38   }
39 };
40 
41 using _8 = Splat8;
42 using _16 = Splat16;
43 #else
44 using _8 = __llvm_libc::scalar::_8;
45 using _16 = Repeated<_8, 2>;
46 #endif // __ARM_NEON
47 
48 using _1 = __llvm_libc::scalar::_1;
49 using _2 = __llvm_libc::scalar::_2;
50 using _3 = __llvm_libc::scalar::_3;
51 using _4 = __llvm_libc::scalar::_4;
52 using _32 = Chained<_16, _16>;
53 using _64 = Chained<_32, _32>;
54 
55 struct ZVA {
56   static constexpr size_t SIZE = 64;
splat_setZVA57   static void splat_set(char *dst, const unsigned char value) {
58 #if __SIZEOF_POINTER__ == 4
59     asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
60 #else
61     asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
62 #endif
63   }
64 };
65 
AArch64ZVA(char * dst,size_t count)66 inline static bool AArch64ZVA(char *dst, size_t count) {
67   uint64_t zva_val;
68   asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
69   if ((zva_val & 31) != 4)
70     return false;
71   splat_set<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
72   return true;
73 }
74 
75 } // namespace aarch64_memset
76 
77 namespace aarch64 {
78 
79 using _1 = __llvm_libc::scalar::_1;
80 using _2 = __llvm_libc::scalar::_2;
81 using _3 = __llvm_libc::scalar::_3;
82 using _4 = __llvm_libc::scalar::_4;
83 using _8 = __llvm_libc::scalar::_8;
84 using _16 = __llvm_libc::scalar::_16;
85 
86 #ifdef __ARM_NEON
87 struct N32 {
88   static constexpr size_t SIZE = 32;
equalsN3289   static bool equals(const char *lhs, const char *rhs) {
90     uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
91     uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
92     uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
93     uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
94     uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
95     uint64_t res =
96         vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
97     return res == 0;
98   }
three_way_compareN3299   static int three_way_compare(const char *lhs, const char *rhs) {
100     uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs);
101     uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs);
102     uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16));
103     uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16));
104     uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1));
105     uint64_t res =
106         vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0);
107     if (res == 0)
108       return 0;
109     size_t index = (__builtin_ctzl(res) >> 3) << 2;
110     uint32_t l = *((const uint32_t *)(lhs + index));
111     uint32_t r = *((const uint32_t *)(rhs + index));
112     return __llvm_libc::scalar::_4::scalar_three_way_compare(l, r);
113   }
114 };
115 
116 using _32 = N32;
117 using _64 = Repeated<_32, 2>;
118 #else
119 using _32 = __llvm_libc::scalar::_32;
120 using _64 = __llvm_libc::scalar::_64;
121 #endif // __ARM_NEON
122 
123 } // namespace aarch64
124 } // namespace __llvm_libc
125 
126 #endif // defined(LLVM_LIBC_ARCH_AARCH64)
127 
128 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H
129