1 //===-- Elementary operations for aarch64 --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 11 12 #if defined(__arm__) || defined(__aarch64__) 13 14 #include <src/string/memory_utils/elements.h> 15 #include <stddef.h> // size_t 16 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 17 18 #ifdef __ARM_NEON 19 #include <arm_neon.h> 20 #endif 21 22 namespace __llvm_libc { 23 namespace aarch64_memset { 24 #ifdef __ARM_NEON 25 struct Splat8 { 26 static constexpr size_t kSize = 8; 27 static void SplatSet(char *dst, const unsigned char value) { 28 vst1_u8((uint8_t *)dst, vdup_n_u8(value)); 29 } 30 }; 31 32 struct Splat16 { 33 static constexpr size_t kSize = 16; 34 static void SplatSet(char *dst, const unsigned char value) { 35 vst1q_u8((uint8_t *)dst, vdupq_n_u8(value)); 36 } 37 }; 38 39 using _8 = Splat8; 40 using _16 = Splat16; 41 #else 42 using _8 = __llvm_libc::scalar::_8; 43 using _16 = Repeated<_8, 2>; 44 #endif // __ARM_NEON 45 46 using _1 = __llvm_libc::scalar::_1; 47 using _2 = __llvm_libc::scalar::_2; 48 using _3 = __llvm_libc::scalar::_3; 49 using _4 = __llvm_libc::scalar::_4; 50 using _32 = Chained<_16, _16>; 51 using _64 = Chained<_32, _32>; 52 53 struct ZVA { 54 static constexpr size_t kSize = 64; 55 static void SplatSet(char *dst, const unsigned char value) { 56 asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); 57 } 58 }; 59 60 inline static bool AArch64ZVA(char *dst, size_t count) { 61 uint64_t zva_val; 62 asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); 63 if ((zva_val & 31) != 4) 64 return false; 65 SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count); 66 return true; 67 } 68 69 } // namespace aarch64_memset 70 71 namespace aarch64 { 72 73 using _1 = __llvm_libc::scalar::_1; 74 using _2 = __llvm_libc::scalar::_2; 75 using _3 = __llvm_libc::scalar::_3; 76 using _4 = __llvm_libc::scalar::_4; 77 using _8 = __llvm_libc::scalar::_8; 78 using _16 = __llvm_libc::scalar::_16; 79 80 #ifdef __ARM_NEON 81 struct N32 { 82 static constexpr size_t kSize = 32; 83 static bool Equals(const char *lhs, const char *rhs) { 84 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs); 85 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs); 86 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16)); 87 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16)); 88 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1)); 89 uint64_t res = 90 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0); 91 return res == 0; 92 } 93 static int ThreeWayCompare(const char *lhs, const char *rhs) { 94 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs); 95 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs); 96 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16)); 97 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16)); 98 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1)); 99 uint64_t res = 100 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0); 101 if (res == 0) 102 return 0; 103 size_t index = (__builtin_ctzl(res) >> 3) << 2; 104 uint32_t l = *((const uint32_t *)(lhs + index)); 105 uint32_t r = *((const uint32_t *)(rhs + index)); 106 return __llvm_libc::scalar::_4::ScalarThreeWayCompare(l, r); 107 } 108 }; 109 110 using _32 = N32; 111 #else 112 using _32 = __llvm_libc::scalar::_32; 113 #endif // __ARM_NEON 114 115 } // namespace aarch64 116 } // namespace __llvm_libc 117 118 #endif // defined(__arm__) || defined(__aarch64__) 119 120 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 121