xref: /linux-6.15/include/linux/unroll.h (revision c6594d64)
17cff549dSKP Singh /* SPDX-License-Identifier: GPL-2.0 */
27cff549dSKP Singh 
37cff549dSKP Singh /*
47cff549dSKP Singh  * Copyright (C) 2023 Google LLC.
57cff549dSKP Singh  */
67cff549dSKP Singh 
77cff549dSKP Singh #ifndef __UNROLL_H
87cff549dSKP Singh #define __UNROLL_H
97cff549dSKP Singh 
107cff549dSKP Singh #include <linux/args.h>
117cff549dSKP Singh 
12*c6594d64SAlexander Lobakin #ifdef CONFIG_CC_IS_CLANG
13*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y)		_Pragma(#x)
14*c6594d64SAlexander Lobakin #elif CONFIG_GCC_VERSION >= 80000
15*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y)		_Pragma(#y)
16*c6594d64SAlexander Lobakin #else
17*c6594d64SAlexander Lobakin #define __pick_unrolled(x, y)		/* not supported */
18*c6594d64SAlexander Lobakin #endif
19*c6594d64SAlexander Lobakin 
20*c6594d64SAlexander Lobakin /**
21*c6594d64SAlexander Lobakin  * unrolled - loop attributes to ask the compiler to unroll it
22*c6594d64SAlexander Lobakin  *
23*c6594d64SAlexander Lobakin  * Usage:
24*c6594d64SAlexander Lobakin  *
25*c6594d64SAlexander Lobakin  * #define BATCH 8
26*c6594d64SAlexander Lobakin  *
27*c6594d64SAlexander Lobakin  *	unrolled_count(BATCH)
28*c6594d64SAlexander Lobakin  *	for (u32 i = 0; i < BATCH; i++)
29*c6594d64SAlexander Lobakin  *		// loop body without cross-iteration dependencies
30*c6594d64SAlexander Lobakin  *
31*c6594d64SAlexander Lobakin  * This is only a hint and the compiler is free to disable unrolling if it
32*c6594d64SAlexander Lobakin  * thinks the count is suboptimal and may hurt performance and/or hugely
33*c6594d64SAlexander Lobakin  * increase object code size.
34*c6594d64SAlexander Lobakin  * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends
35*c6594d64SAlexander Lobakin  * on what iter x will do with variables) is not a strict requirement, but
36*c6594d64SAlexander Lobakin  * provides best performance and object code size.
37*c6594d64SAlexander Lobakin  * Available only on Clang and GCC 8.x onwards.
38*c6594d64SAlexander Lobakin  */
39*c6594d64SAlexander Lobakin 
40*c6594d64SAlexander Lobakin /* Ask the compiler to pick an optimal unroll count, Clang only */
41*c6594d64SAlexander Lobakin #define unrolled							\
42*c6594d64SAlexander Lobakin 	__pick_unrolled(clang loop unroll(enable), /* nothing */)
43*c6594d64SAlexander Lobakin 
44*c6594d64SAlexander Lobakin /* Unroll each @n iterations of the loop */
45*c6594d64SAlexander Lobakin #define unrolled_count(n)						\
46*c6594d64SAlexander Lobakin 	__pick_unrolled(clang loop unroll_count(n), GCC unroll n)
47*c6594d64SAlexander Lobakin 
48*c6594d64SAlexander Lobakin /* Unroll the whole loop */
49*c6594d64SAlexander Lobakin #define unrolled_full							\
50*c6594d64SAlexander Lobakin 	__pick_unrolled(clang loop unroll(full), GCC unroll 65534)
51*c6594d64SAlexander Lobakin 
52*c6594d64SAlexander Lobakin /* Never unroll the loop */
53*c6594d64SAlexander Lobakin #define unrolled_none							\
54*c6594d64SAlexander Lobakin 	__pick_unrolled(clang loop unroll(disable), GCC unroll 1)
55*c6594d64SAlexander Lobakin 
567cff549dSKP Singh #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
577cff549dSKP Singh 
587cff549dSKP Singh #define __UNROLL_0(MACRO, args...)
597cff549dSKP Singh #define __UNROLL_1(MACRO, args...)  __UNROLL_0(MACRO, args)  MACRO(0, args)
607cff549dSKP Singh #define __UNROLL_2(MACRO, args...)  __UNROLL_1(MACRO, args)  MACRO(1, args)
617cff549dSKP Singh #define __UNROLL_3(MACRO, args...)  __UNROLL_2(MACRO, args)  MACRO(2, args)
627cff549dSKP Singh #define __UNROLL_4(MACRO, args...)  __UNROLL_3(MACRO, args)  MACRO(3, args)
637cff549dSKP Singh #define __UNROLL_5(MACRO, args...)  __UNROLL_4(MACRO, args)  MACRO(4, args)
647cff549dSKP Singh #define __UNROLL_6(MACRO, args...)  __UNROLL_5(MACRO, args)  MACRO(5, args)
657cff549dSKP Singh #define __UNROLL_7(MACRO, args...)  __UNROLL_6(MACRO, args)  MACRO(6, args)
667cff549dSKP Singh #define __UNROLL_8(MACRO, args...)  __UNROLL_7(MACRO, args)  MACRO(7, args)
677cff549dSKP Singh #define __UNROLL_9(MACRO, args...)  __UNROLL_8(MACRO, args)  MACRO(8, args)
687cff549dSKP Singh #define __UNROLL_10(MACRO, args...) __UNROLL_9(MACRO, args)  MACRO(9, args)
697cff549dSKP Singh #define __UNROLL_11(MACRO, args...) __UNROLL_10(MACRO, args) MACRO(10, args)
707cff549dSKP Singh #define __UNROLL_12(MACRO, args...) __UNROLL_11(MACRO, args) MACRO(11, args)
717cff549dSKP Singh #define __UNROLL_13(MACRO, args...) __UNROLL_12(MACRO, args) MACRO(12, args)
727cff549dSKP Singh #define __UNROLL_14(MACRO, args...) __UNROLL_13(MACRO, args) MACRO(13, args)
737cff549dSKP Singh #define __UNROLL_15(MACRO, args...) __UNROLL_14(MACRO, args) MACRO(14, args)
747cff549dSKP Singh #define __UNROLL_16(MACRO, args...) __UNROLL_15(MACRO, args) MACRO(15, args)
757cff549dSKP Singh #define __UNROLL_17(MACRO, args...) __UNROLL_16(MACRO, args) MACRO(16, args)
767cff549dSKP Singh #define __UNROLL_18(MACRO, args...) __UNROLL_17(MACRO, args) MACRO(17, args)
777cff549dSKP Singh #define __UNROLL_19(MACRO, args...) __UNROLL_18(MACRO, args) MACRO(18, args)
787cff549dSKP Singh #define __UNROLL_20(MACRO, args...) __UNROLL_19(MACRO, args) MACRO(19, args)
797cff549dSKP Singh 
807cff549dSKP Singh #endif /* __UNROLL_H */
81