17cff549dSKP Singh /* SPDX-License-Identifier: GPL-2.0 */ 27cff549dSKP Singh 37cff549dSKP Singh /* 47cff549dSKP Singh * Copyright (C) 2023 Google LLC. 57cff549dSKP Singh */ 67cff549dSKP Singh 77cff549dSKP Singh #ifndef __UNROLL_H 87cff549dSKP Singh #define __UNROLL_H 97cff549dSKP Singh 107cff549dSKP Singh #include <linux/args.h> 117cff549dSKP Singh 12*c6594d64SAlexander Lobakin #ifdef CONFIG_CC_IS_CLANG 13*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y) _Pragma(#x) 14*c6594d64SAlexander Lobakin #elif CONFIG_GCC_VERSION >= 80000 15*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y) _Pragma(#y) 16*c6594d64SAlexander Lobakin #else 17*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y) /* not supported */ 18*c6594d64SAlexander Lobakin #endif 19*c6594d64SAlexander Lobakin 20*c6594d64SAlexander Lobakin /** 21*c6594d64SAlexander Lobakin * unrolled - loop attributes to ask the compiler to unroll it 22*c6594d64SAlexander Lobakin * 23*c6594d64SAlexander Lobakin * Usage: 24*c6594d64SAlexander Lobakin * 25*c6594d64SAlexander Lobakin * #define BATCH 8 26*c6594d64SAlexander Lobakin * 27*c6594d64SAlexander Lobakin * unrolled_count(BATCH) 28*c6594d64SAlexander Lobakin * for (u32 i = 0; i < BATCH; i++) 29*c6594d64SAlexander Lobakin * // loop body without cross-iteration dependencies 30*c6594d64SAlexander Lobakin * 31*c6594d64SAlexander Lobakin * This is only a hint and the compiler is free to disable unrolling if it 32*c6594d64SAlexander Lobakin * thinks the count is suboptimal and may hurt performance and/or hugely 33*c6594d64SAlexander Lobakin * increase object code size. 34*c6594d64SAlexander Lobakin * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends 35*c6594d64SAlexander Lobakin * on what iter x will do with variables) is not a strict requirement, but 36*c6594d64SAlexander Lobakin * provides best performance and object code size. 37*c6594d64SAlexander Lobakin * Available only on Clang and GCC 8.x onwards. 38*c6594d64SAlexander Lobakin */ 39*c6594d64SAlexander Lobakin 40*c6594d64SAlexander Lobakin /* Ask the compiler to pick an optimal unroll count, Clang only */ 41*c6594d64SAlexander Lobakin #define unrolled \ 42*c6594d64SAlexander Lobakin __pick_unrolled(clang loop unroll(enable), /* nothing */) 43*c6594d64SAlexander Lobakin 44*c6594d64SAlexander Lobakin /* Unroll each @n iterations of the loop */ 45*c6594d64SAlexander Lobakin #define unrolled_count(n) \ 46*c6594d64SAlexander Lobakin __pick_unrolled(clang loop unroll_count(n), GCC unroll n) 47*c6594d64SAlexander Lobakin 48*c6594d64SAlexander Lobakin /* Unroll the whole loop */ 49*c6594d64SAlexander Lobakin #define unrolled_full \ 50*c6594d64SAlexander Lobakin __pick_unrolled(clang loop unroll(full), GCC unroll 65534) 51*c6594d64SAlexander Lobakin 52*c6594d64SAlexander Lobakin /* Never unroll the loop */ 53*c6594d64SAlexander Lobakin #define unrolled_none \ 54*c6594d64SAlexander Lobakin __pick_unrolled(clang loop unroll(disable), GCC unroll 1) 55*c6594d64SAlexander Lobakin 567cff549dSKP Singh #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) 577cff549dSKP Singh 587cff549dSKP Singh #define __UNROLL_0(MACRO, args...) 597cff549dSKP Singh #define __UNROLL_1(MACRO, args...) __UNROLL_0(MACRO, args) MACRO(0, args) 607cff549dSKP Singh #define __UNROLL_2(MACRO, args...) __UNROLL_1(MACRO, args) MACRO(1, args) 617cff549dSKP Singh #define __UNROLL_3(MACRO, args...) __UNROLL_2(MACRO, args) MACRO(2, args) 627cff549dSKP Singh #define __UNROLL_4(MACRO, args...) __UNROLL_3(MACRO, args) MACRO(3, args) 637cff549dSKP Singh #define __UNROLL_5(MACRO, args...) __UNROLL_4(MACRO, args) MACRO(4, args) 647cff549dSKP Singh #define __UNROLL_6(MACRO, args...) __UNROLL_5(MACRO, args) MACRO(5, args) 657cff549dSKP Singh #define __UNROLL_7(MACRO, args...) __UNROLL_6(MACRO, args) MACRO(6, args) 667cff549dSKP Singh #define __UNROLL_8(MACRO, args...) __UNROLL_7(MACRO, args) MACRO(7, args) 677cff549dSKP Singh #define __UNROLL_9(MACRO, args...) __UNROLL_8(MACRO, args) MACRO(8, args) 687cff549dSKP Singh #define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args) MACRO(9, args) 697cff549dSKP Singh #define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args) 707cff549dSKP Singh #define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args) 717cff549dSKP Singh #define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args) 727cff549dSKP Singh #define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args) 737cff549dSKP Singh #define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args) 747cff549dSKP Singh #define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args) 757cff549dSKP Singh #define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args) 767cff549dSKP Singh #define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args) 777cff549dSKP Singh #define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args) 787cff549dSKP Singh #define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args) 797cff549dSKP Singh 807cff549dSKP Singh #endif /* __UNROLL_H */ 81