1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2018 Intel Corporation 3 */ 4 5 /* 6 * This is a simple functional test for rte_smp_mb() implementation. 7 * I.E. make sure that LOAD and STORE operations that precede the 8 * rte_smp_mb() call are globally visible across the lcores 9 * before the LOAD and STORE operations that follows it. 10 * The test uses simple implementation of Peterson's lock algorithm 11 * (https://en.wikipedia.org/wiki/Peterson%27s_algorithm) 12 * for two execution units to make sure that rte_smp_mb() prevents 13 * store-load reordering to happen. 14 * Also when executed on a single lcore could be used as a approximate 15 * estimation of number of cycles particular implementation of rte_smp_mb() 16 * will take. 17 */ 18 19 #include <stdio.h> 20 #include <string.h> 21 #include <stdint.h> 22 #include <inttypes.h> 23 24 #include <rte_memory.h> 25 #include <rte_per_lcore.h> 26 #include <rte_launch.h> 27 #include <rte_eal.h> 28 #include <rte_lcore.h> 29 #include <rte_pause.h> 30 #include <rte_random.h> 31 #include <rte_cycles.h> 32 #include <rte_vect.h> 33 #include <rte_debug.h> 34 35 #include "test.h" 36 37 #define ADD_MAX 8 38 #define ITER_MAX 0x1000000 39 40 enum plock_use_type { 41 USE_MB, 42 USE_SMP_MB, 43 USE_NUM 44 }; 45 46 struct plock { 47 volatile uint32_t flag[2]; 48 volatile uint32_t victim; 49 enum plock_use_type utype; 50 }; 51 52 /* 53 * Lock plus protected by it two counters. 54 */ 55 struct plock_test { 56 struct plock lock; 57 uint64_t val; 58 uint64_t iter; 59 }; 60 61 /* 62 * Each active lcore shares plock_test struct with it's left and right 63 * neighbours. 64 */ 65 struct lcore_plock_test { 66 struct plock_test *pt[2]; /* shared, lock-protected data */ 67 uint64_t sum[2]; /* local copy of the shared data */ 68 uint64_t iter; /* number of iterations to perform */ 69 uint32_t lc; /* given lcore id */ 70 }; 71 72 static inline void 73 store_load_barrier(uint32_t utype) 74 { 75 if (utype == USE_MB) 76 rte_mb(); 77 else if (utype == USE_SMP_MB) 78 rte_smp_mb(); 79 else 80 RTE_VERIFY(0); 81 } 82 83 /* 84 * Peterson lock implementation. 85 */ 86 static void 87 plock_lock(struct plock *l, uint32_t self) 88 { 89 uint32_t other; 90 91 other = self ^ 1; 92 93 l->flag[self] = 1; 94 rte_smp_wmb(); 95 l->victim = self; 96 97 store_load_barrier(l->utype); 98 99 while (l->flag[other] == 1 && l->victim == self) 100 rte_pause(); 101 rte_smp_rmb(); 102 } 103 104 static void 105 plock_unlock(struct plock *l, uint32_t self) 106 { 107 rte_smp_wmb(); 108 l->flag[self] = 0; 109 } 110 111 static void 112 plock_reset(struct plock *l, enum plock_use_type utype) 113 { 114 memset(l, 0, sizeof(*l)); 115 l->utype = utype; 116 } 117 118 /* 119 * grab the lock, update both counters, release the lock. 120 */ 121 static void 122 plock_add(struct plock_test *pt, uint32_t self, uint32_t n) 123 { 124 plock_lock(&pt->lock, self); 125 pt->iter++; 126 pt->val += n; 127 plock_unlock(&pt->lock, self); 128 } 129 130 static int 131 plock_test1_lcore(void *data) 132 { 133 uint64_t tm; 134 uint32_t lc, ln; 135 uint64_t i, n; 136 struct lcore_plock_test *lpt; 137 138 lpt = data; 139 lc = rte_lcore_id(); 140 141 /* find lcore_plock_test struct for given lcore */ 142 for (ln = rte_lcore_count(); ln != 0 && lpt->lc != lc; lpt++, ln--) 143 ; 144 145 if (ln == 0) { 146 printf("%s(%u) error at init\n", __func__, lc); 147 return -1; 148 } 149 150 n = rte_rand() % ADD_MAX; 151 tm = rte_get_timer_cycles(); 152 153 /* 154 * for each iteration: 155 * - update shared, locked protected data in a safe manner 156 * - update local copy of the shared data 157 */ 158 for (i = 0; i != lpt->iter; i++) { 159 160 plock_add(lpt->pt[0], 0, n); 161 plock_add(lpt->pt[1], 1, n); 162 163 lpt->sum[0] += n; 164 lpt->sum[1] += n; 165 166 n = (n + 1) % ADD_MAX; 167 } 168 169 tm = rte_get_timer_cycles() - tm; 170 171 printf("%s(%u): %" PRIu64 " iterations finished, in %" PRIu64 172 " cycles, %#Lf cycles/iteration, " 173 "local sum={%" PRIu64 ", %" PRIu64 "}\n", 174 __func__, lc, i, tm, (long double)tm / i, 175 lpt->sum[0], lpt->sum[1]); 176 return 0; 177 } 178 179 /* 180 * For N active lcores we allocate N+1 lcore_plock_test structures. 181 * Each active lcore shares one lcore_plock_test structure with its 182 * left lcore neighbor and one lcore_plock_test structure with its 183 * right lcore neighbor. 184 * During the test each lcore updates data in both shared structures and 185 * its local copies. Then at validation phase we check that our shared 186 * and local data are the same. 187 */ 188 static int 189 plock_test(uint64_t iter, enum plock_use_type utype) 190 { 191 int32_t rc; 192 uint32_t i, lc, n; 193 uint64_t *sum; 194 struct plock_test *pt; 195 struct lcore_plock_test *lpt; 196 197 /* init phase, allocate and initialize shared data */ 198 199 n = rte_lcore_count(); 200 pt = calloc(n + 1, sizeof(*pt)); 201 lpt = calloc(n, sizeof(*lpt)); 202 sum = calloc(n + 1, sizeof(*sum)); 203 204 printf("%s(iter=%" PRIu64 ", utype=%u) started on %u lcores\n", 205 __func__, iter, utype, n); 206 207 if (pt == NULL || lpt == NULL || sum == NULL) { 208 printf("%s: failed to allocate memory for %u lcores\n", 209 __func__, n); 210 free(pt); 211 free(lpt); 212 free(sum); 213 return -ENOMEM; 214 } 215 216 for (i = 0; i != n + 1; i++) 217 plock_reset(&pt[i].lock, utype); 218 219 i = 0; 220 RTE_LCORE_FOREACH(lc) { 221 222 lpt[i].lc = lc; 223 lpt[i].iter = iter; 224 lpt[i].pt[0] = pt + i; 225 lpt[i].pt[1] = pt + i + 1; 226 i++; 227 } 228 229 lpt[i - 1].pt[1] = pt; 230 231 for (i = 0; i != n; i++) 232 printf("lpt[%u]={lc=%u, pt={%p, %p},};\n", 233 i, lpt[i].lc, lpt[i].pt[0], lpt[i].pt[1]); 234 235 236 /* test phase - start and wait for completion on each active lcore */ 237 238 rte_eal_mp_remote_launch(plock_test1_lcore, lpt, CALL_MAIN); 239 rte_eal_mp_wait_lcore(); 240 241 /* validation phase - make sure that shared and local data match */ 242 243 for (i = 0; i != n; i++) { 244 sum[i] += lpt[i].sum[0]; 245 sum[i + 1] += lpt[i].sum[1]; 246 } 247 248 sum[0] += sum[i]; 249 250 rc = 0; 251 for (i = 0; i != n; i++) { 252 printf("%s: sum[%u]=%" PRIu64 ", pt[%u].val=%" PRIu64 ", pt[%u].iter=%" PRIu64 ";\n", 253 __func__, i, sum[i], i, pt[i].val, i, pt[i].iter); 254 255 /* race condition occurred, lock doesn't work properly */ 256 if (sum[i] != pt[i].val || 2 * iter != pt[i].iter) { 257 printf("error: local and shared sums don't match\n"); 258 rc = -1; 259 } 260 } 261 262 free(pt); 263 free(lpt); 264 free(sum); 265 266 printf("%s(utype=%u) returns %d\n", __func__, utype, rc); 267 return rc; 268 } 269 270 static int 271 test_barrier(void) 272 { 273 int32_t i, ret, rc[USE_NUM]; 274 275 for (i = 0; i != RTE_DIM(rc); i++) 276 rc[i] = plock_test(ITER_MAX, i); 277 278 ret = 0; 279 for (i = 0; i != RTE_DIM(rc); i++) { 280 printf("%s for utype=%d %s\n", 281 __func__, i, rc[i] == 0 ? "passed" : "failed"); 282 ret |= rc[i]; 283 } 284 285 return ret; 286 } 287 288 REGISTER_TEST_COMMAND(barrier_autotest, test_barrier); 289