1 //===-- Elementary operations for aarch64 --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 11 12 #include "src/__support/architectures.h" 13 14 #if defined(LLVM_LIBC_ARCH_AARCH64) 15 16 #include <src/string/memory_utils/elements.h> 17 #include <stddef.h> // size_t 18 #include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t 19 20 #ifdef __ARM_NEON 21 #include <arm_neon.h> 22 #endif 23 24 namespace __llvm_libc { 25 namespace aarch64_memset { 26 #ifdef __ARM_NEON 27 struct Splat8 { 28 static constexpr size_t SIZE = 8; 29 static void splat_set(char *dst, const unsigned char value) { 30 vst1_u8((uint8_t *)dst, vdup_n_u8(value)); 31 } 32 }; 33 34 struct Splat16 { 35 static constexpr size_t SIZE = 16; 36 static void splat_set(char *dst, const unsigned char value) { 37 vst1q_u8((uint8_t *)dst, vdupq_n_u8(value)); 38 } 39 }; 40 41 using _8 = Splat8; 42 using _16 = Splat16; 43 #else 44 using _8 = __llvm_libc::scalar::_8; 45 using _16 = Repeated<_8, 2>; 46 #endif // __ARM_NEON 47 48 using _1 = __llvm_libc::scalar::_1; 49 using _2 = __llvm_libc::scalar::_2; 50 using _3 = __llvm_libc::scalar::_3; 51 using _4 = __llvm_libc::scalar::_4; 52 using _32 = Chained<_16, _16>; 53 using _64 = Chained<_32, _32>; 54 55 struct ZVA { 56 static constexpr size_t SIZE = 64; 57 static void splat_set(char *dst, const unsigned char value) { 58 #if __SIZEOF_POINTER__ == 4 59 asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory"); 60 #else 61 asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); 62 #endif 63 } 64 }; 65 66 inline static bool AArch64ZVA(char *dst, size_t count) { 67 uint64_t zva_val; 68 asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); 69 if ((zva_val & 31) != 4) 70 return false; 71 splat_set<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count); 72 return true; 73 } 74 75 } // namespace aarch64_memset 76 77 namespace aarch64 { 78 79 using _1 = __llvm_libc::scalar::_1; 80 using _2 = __llvm_libc::scalar::_2; 81 using _3 = __llvm_libc::scalar::_3; 82 using _4 = __llvm_libc::scalar::_4; 83 using _8 = __llvm_libc::scalar::_8; 84 using _16 = __llvm_libc::scalar::_16; 85 86 #ifdef __ARM_NEON 87 struct N32 { 88 static constexpr size_t SIZE = 32; 89 static bool equals(const char *lhs, const char *rhs) { 90 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs); 91 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs); 92 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16)); 93 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16)); 94 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1)); 95 uint64_t res = 96 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0); 97 return res == 0; 98 } 99 static int three_way_compare(const char *lhs, const char *rhs) { 100 uint8x16_t l_0 = vld1q_u8((const uint8_t *)lhs); 101 uint8x16_t r_0 = vld1q_u8((const uint8_t *)rhs); 102 uint8x16_t l_1 = vld1q_u8((const uint8_t *)(lhs + 16)); 103 uint8x16_t r_1 = vld1q_u8((const uint8_t *)(rhs + 16)); 104 uint8x16_t temp = vpmaxq_u8(veorq_u8(l_0, r_0), veorq_u8(l_1, r_1)); 105 uint64_t res = 106 vgetq_lane_u64(vreinterpretq_u64_u8(vpmaxq_u8(temp, temp)), 0); 107 if (res == 0) 108 return 0; 109 size_t index = (__builtin_ctzl(res) >> 3) << 2; 110 uint32_t l = *((const uint32_t *)(lhs + index)); 111 uint32_t r = *((const uint32_t *)(rhs + index)); 112 return __llvm_libc::scalar::_4::scalar_three_way_compare(l, r); 113 } 114 }; 115 116 using _32 = N32; 117 using _64 = Repeated<_32, 2>; 118 #else 119 using _32 = __llvm_libc::scalar::_32; 120 using _64 = __llvm_libc::scalar::_64; 121 #endif // __ARM_NEON 122 123 } // namespace aarch64 124 } // namespace __llvm_libc 125 126 #endif // defined(LLVM_LIBC_ARCH_AARCH64) 127 128 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H 129