1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "test.h" 35 36 #include <unistd.h> 37 #include <string.h> 38 #include <rte_mempool.h> 39 #include <rte_cycles.h> 40 #include <rte_common.h> 41 #include <rte_mbuf.h> 42 #include <rte_distributor.h> 43 44 #define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */ 45 #define BURST 32 46 #define BIG_BATCH 1024 47 48 /* static vars - zero initialized by default */ 49 static volatile int quit; 50 static volatile unsigned worker_idx; 51 52 struct worker_stats { 53 volatile unsigned handled_packets; 54 } __rte_cache_aligned; 55 struct worker_stats worker_stats[RTE_MAX_LCORE]; 56 57 /* worker thread used for testing the time to do a round-trip of a cache 58 * line between two cores and back again 59 */ 60 static void 61 flip_bit(volatile uint64_t *arg) 62 { 63 uint64_t old_val = 0; 64 while (old_val != 2) { 65 while (!*arg) 66 rte_pause(); 67 old_val = *arg; 68 *arg = 0; 69 } 70 } 71 72 /* test case to time the number of cycles to round-trip a cache line between 73 * two cores and back again. 74 */ 75 static void 76 time_cache_line_switch(void) 77 { 78 /* allocate a full cache line for data, we use only first byte of it */ 79 uint64_t data[RTE_CACHE_LINE_SIZE*3 / sizeof(uint64_t)]; 80 81 unsigned i, slaveid = rte_get_next_lcore(rte_lcore_id(), 0, 0); 82 volatile uint64_t *pdata = &data[0]; 83 *pdata = 1; 84 rte_eal_remote_launch((lcore_function_t *)flip_bit, &data[0], slaveid); 85 while (*pdata) 86 rte_pause(); 87 88 const uint64_t start_time = rte_rdtsc(); 89 for (i = 0; i < (1 << ITER_POWER); i++) { 90 while (*pdata) 91 rte_pause(); 92 *pdata = 1; 93 } 94 const uint64_t end_time = rte_rdtsc(); 95 96 while (*pdata) 97 rte_pause(); 98 *pdata = 2; 99 rte_eal_wait_lcore(slaveid); 100 printf("==== Cache line switch test ===\n"); 101 printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER), 102 end_time-start_time); 103 printf("Ticks per iteration = %"PRIu64"\n\n", 104 (end_time-start_time) >> ITER_POWER); 105 } 106 107 /* returns the total count of the number of packets handled by the worker 108 * functions given below. 109 */ 110 static unsigned 111 total_packet_count(void) 112 { 113 unsigned i, count = 0; 114 for (i = 0; i < worker_idx; i++) 115 count += worker_stats[i].handled_packets; 116 return count; 117 } 118 119 /* resets the packet counts for a new test */ 120 static void 121 clear_packet_count(void) 122 { 123 memset(&worker_stats, 0, sizeof(worker_stats)); 124 } 125 126 /* this is the basic worker function for performance tests. 127 * it does nothing but return packets and count them. 128 */ 129 static int 130 handle_work(void *arg) 131 { 132 struct rte_mbuf *pkt = NULL; 133 struct rte_distributor *d = arg; 134 unsigned count = 0; 135 unsigned id = __sync_fetch_and_add(&worker_idx, 1); 136 137 pkt = rte_distributor_get_pkt(d, id, NULL); 138 while (!quit) { 139 worker_stats[id].handled_packets++, count++; 140 pkt = rte_distributor_get_pkt(d, id, pkt); 141 } 142 worker_stats[id].handled_packets++, count++; 143 rte_distributor_return_pkt(d, id, pkt); 144 return 0; 145 } 146 147 /* this basic performance test just repeatedly sends in 32 packets at a time 148 * to the distributor and verifies at the end that we got them all in the worker 149 * threads and finally how long per packet the processing took. 150 */ 151 static inline int 152 perf_test(struct rte_distributor *d, struct rte_mempool *p) 153 { 154 unsigned i; 155 uint64_t start, end; 156 struct rte_mbuf *bufs[BURST]; 157 158 clear_packet_count(); 159 if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) { 160 printf("Error getting mbufs from pool\n"); 161 return -1; 162 } 163 /* ensure we have different hash value for each pkt */ 164 for (i = 0; i < BURST; i++) 165 bufs[i]->hash.usr = i; 166 167 start = rte_rdtsc(); 168 for (i = 0; i < (1<<ITER_POWER); i++) 169 rte_distributor_process(d, bufs, BURST); 170 end = rte_rdtsc(); 171 172 do { 173 usleep(100); 174 rte_distributor_process(d, NULL, 0); 175 } while (total_packet_count() < (BURST << ITER_POWER)); 176 177 printf("=== Performance test of distributor ===\n"); 178 printf("Time per burst: %"PRIu64"\n", (end - start) >> ITER_POWER); 179 printf("Time per packet: %"PRIu64"\n\n", 180 ((end - start) >> ITER_POWER)/BURST); 181 rte_mempool_put_bulk(p, (void *)bufs, BURST); 182 183 for (i = 0; i < rte_lcore_count() - 1; i++) 184 printf("Worker %u handled %u packets\n", i, 185 worker_stats[i].handled_packets); 186 printf("Total packets: %u (%x)\n", total_packet_count(), 187 total_packet_count()); 188 printf("=== Perf test done ===\n\n"); 189 190 return 0; 191 } 192 193 /* Useful function which ensures that all worker functions terminate */ 194 static void 195 quit_workers(struct rte_distributor *d, struct rte_mempool *p) 196 { 197 const unsigned num_workers = rte_lcore_count() - 1; 198 unsigned i; 199 struct rte_mbuf *bufs[RTE_MAX_LCORE]; 200 rte_mempool_get_bulk(p, (void *)bufs, num_workers); 201 202 quit = 1; 203 for (i = 0; i < num_workers; i++) 204 bufs[i]->hash.usr = i << 1; 205 rte_distributor_process(d, bufs, num_workers); 206 207 rte_mempool_put_bulk(p, (void *)bufs, num_workers); 208 209 rte_distributor_process(d, NULL, 0); 210 rte_eal_mp_wait_lcore(); 211 quit = 0; 212 worker_idx = 0; 213 } 214 215 static int 216 test_distributor_perf(void) 217 { 218 static struct rte_distributor *d; 219 static struct rte_mempool *p; 220 221 if (rte_lcore_count() < 2) { 222 printf("ERROR: not enough cores to test distributor\n"); 223 return -1; 224 } 225 226 /* first time how long it takes to round-trip a cache line */ 227 time_cache_line_switch(); 228 229 if (d == NULL) { 230 d = rte_distributor_create("Test_perf", rte_socket_id(), 231 rte_lcore_count() - 1); 232 if (d == NULL) { 233 printf("Error creating distributor\n"); 234 return -1; 235 } 236 } else { 237 rte_distributor_flush(d); 238 rte_distributor_clear_returns(d); 239 } 240 241 const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ? 242 (BIG_BATCH * 2) - 1 : (511 * rte_lcore_count()); 243 if (p == NULL) { 244 p = rte_pktmbuf_pool_create("DPT_MBUF_POOL", nb_bufs, BURST, 245 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); 246 if (p == NULL) { 247 printf("Error creating mempool\n"); 248 return -1; 249 } 250 } 251 252 rte_eal_mp_remote_launch(handle_work, d, SKIP_MASTER); 253 if (perf_test(d, p) < 0) 254 return -1; 255 quit_workers(d, p); 256 257 return 0; 258 } 259 260 REGISTER_TEST_COMMAND(distributor_perf_autotest, test_distributor_perf); 261