1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(C) 2020 Marvell International Ltd.
3 */
4
5 #include <fnmatch.h>
6 #include <stdbool.h>
7
8 #include <rte_common.h>
9 #include <rte_errno.h>
10 #include <rte_malloc.h>
11
12 #include "graph_private.h"
13
14 /* Capture all graphs of cluster */
15 struct cluster {
16 rte_graph_t nb_graphs;
17 rte_graph_t size;
18
19 struct graph **graphs;
20 };
21
22 /* Capture same node ID across cluster */
23 struct cluster_node {
24 struct rte_graph_cluster_node_stats stat;
25 rte_node_t nb_nodes;
26
27 struct rte_node *nodes[];
28 };
29
30 struct rte_graph_cluster_stats {
31 /* Header */
32 rte_graph_cluster_stats_cb_t fn;
33 uint32_t cluster_node_size; /* Size of struct cluster_node */
34 rte_node_t max_nodes;
35 int socket_id;
36 void *cookie;
37 size_t sz;
38
39 struct cluster_node clusters[];
40 } __rte_cache_aligned;
41
42 #define boarder() \
43 fprintf(f, "+-------------------------------+---------------+--------" \
44 "-------+---------------+---------------+---------------+-" \
45 "----------+\n")
46
47 static inline void
print_banner(FILE * f)48 print_banner(FILE *f)
49 {
50 boarder();
51 fprintf(f, "%-32s%-16s%-16s%-16s%-16s%-16s%-16s\n", "|Node", "|calls",
52 "|objs", "|realloc_count", "|objs/call", "|objs/sec(10E6)",
53 "|cycles/call|");
54 boarder();
55 }
56
57 static inline void
print_node(FILE * f,const struct rte_graph_cluster_node_stats * stat)58 print_node(FILE *f, const struct rte_graph_cluster_node_stats *stat)
59 {
60 double objs_per_call, objs_per_sec, cycles_per_call, ts_per_hz;
61 const uint64_t prev_calls = stat->prev_calls;
62 const uint64_t prev_objs = stat->prev_objs;
63 const uint64_t cycles = stat->cycles;
64 const uint64_t calls = stat->calls;
65 const uint64_t objs = stat->objs;
66 uint64_t call_delta;
67
68 call_delta = calls - prev_calls;
69 objs_per_call =
70 call_delta ? (double)((objs - prev_objs) / call_delta) : 0;
71 cycles_per_call =
72 call_delta ? (double)((cycles - stat->prev_cycles) / call_delta)
73 : 0;
74 ts_per_hz = (double)((stat->ts - stat->prev_ts) / stat->hz);
75 objs_per_sec = ts_per_hz ? (objs - prev_objs) / ts_per_hz : 0;
76 objs_per_sec /= 1000000;
77
78 fprintf(f,
79 "|%-31s|%-15" PRIu64 "|%-15" PRIu64 "|%-15" PRIu64
80 "|%-15.3f|%-15.6f|%-11.4f|\n",
81 stat->name, calls, objs, stat->realloc_count, objs_per_call,
82 objs_per_sec, cycles_per_call);
83 }
84
85 static int
graph_cluster_stats_cb(bool is_first,bool is_last,void * cookie,const struct rte_graph_cluster_node_stats * stat)86 graph_cluster_stats_cb(bool is_first, bool is_last, void *cookie,
87 const struct rte_graph_cluster_node_stats *stat)
88 {
89 FILE *f = cookie;
90
91 if (unlikely(is_first))
92 print_banner(f);
93 if (stat->objs)
94 print_node(f, stat);
95 if (unlikely(is_last))
96 boarder();
97
98 return 0;
99 };
100
101 static struct rte_graph_cluster_stats *
stats_mem_init(struct cluster * cluster,const struct rte_graph_cluster_stats_param * prm)102 stats_mem_init(struct cluster *cluster,
103 const struct rte_graph_cluster_stats_param *prm)
104 {
105 size_t sz = sizeof(struct rte_graph_cluster_stats);
106 struct rte_graph_cluster_stats *stats;
107 rte_graph_cluster_stats_cb_t fn;
108 int socket_id = prm->socket_id;
109 uint32_t cluster_node_size;
110
111 /* Fix up callback */
112 fn = prm->fn;
113 if (fn == NULL)
114 fn = graph_cluster_stats_cb;
115
116 cluster_node_size = sizeof(struct cluster_node);
117 /* For a given cluster, max nodes will be the max number of graphs */
118 cluster_node_size += cluster->nb_graphs * sizeof(struct rte_node *);
119 cluster_node_size = RTE_ALIGN(cluster_node_size, RTE_CACHE_LINE_SIZE);
120
121 stats = realloc(NULL, sz);
122 if (stats) {
123 memset(stats, 0, sz);
124 stats->fn = fn;
125 stats->cluster_node_size = cluster_node_size;
126 stats->max_nodes = 0;
127 stats->socket_id = socket_id;
128 stats->cookie = prm->cookie;
129 stats->sz = sz;
130 }
131
132 return stats;
133 }
134
135 static int
stats_mem_populate(struct rte_graph_cluster_stats ** stats_in,struct rte_graph * graph,struct graph_node * graph_node)136 stats_mem_populate(struct rte_graph_cluster_stats **stats_in,
137 struct rte_graph *graph, struct graph_node *graph_node)
138 {
139 struct rte_graph_cluster_stats *stats = *stats_in;
140 rte_node_t id = graph_node->node->id;
141 struct cluster_node *cluster;
142 struct rte_node *node;
143 rte_node_t count;
144
145 cluster = stats->clusters;
146
147 /* Iterate over cluster node array to find node ID match */
148 for (count = 0; count < stats->max_nodes; count++) {
149 /* Found an existing node in the reel */
150 if (cluster->stat.id == id) {
151 node = graph_node_id_to_ptr(graph, id);
152 if (node == NULL)
153 SET_ERR_JMP(
154 ENOENT, err,
155 "Failed to find node %s in graph %s",
156 graph_node->node->name, graph->name);
157
158 cluster->nodes[cluster->nb_nodes++] = node;
159 return 0;
160 }
161 cluster = RTE_PTR_ADD(cluster, stats->cluster_node_size);
162 }
163
164 /* Hey, it is a new node, allocate space for it in the reel */
165 stats = realloc(stats, stats->sz + stats->cluster_node_size);
166 if (stats == NULL)
167 SET_ERR_JMP(ENOMEM, err, "Realloc failed");
168 *stats_in = NULL;
169
170 /* Clear the new struct cluster_node area */
171 cluster = RTE_PTR_ADD(stats, stats->sz),
172 memset(cluster, 0, stats->cluster_node_size);
173 memcpy(cluster->stat.name, graph_node->node->name, RTE_NODE_NAMESIZE);
174 cluster->stat.id = graph_node->node->id;
175 cluster->stat.hz = rte_get_timer_hz();
176 node = graph_node_id_to_ptr(graph, id);
177 if (node == NULL)
178 SET_ERR_JMP(ENOENT, free, "Failed to find node %s in graph %s",
179 graph_node->node->name, graph->name);
180 cluster->nodes[cluster->nb_nodes++] = node;
181
182 stats->sz += stats->cluster_node_size;
183 stats->max_nodes++;
184 *stats_in = stats;
185
186 return 0;
187 free:
188 free(stats);
189 err:
190 return -rte_errno;
191 }
192
193 static void
stats_mem_fini(struct rte_graph_cluster_stats * stats)194 stats_mem_fini(struct rte_graph_cluster_stats *stats)
195 {
196 free(stats);
197 }
198
199 static void
cluster_init(struct cluster * cluster)200 cluster_init(struct cluster *cluster)
201 {
202 memset(cluster, 0, sizeof(*cluster));
203 }
204
205 static int
cluster_add(struct cluster * cluster,struct graph * graph)206 cluster_add(struct cluster *cluster, struct graph *graph)
207 {
208 rte_graph_t count;
209 size_t sz;
210
211 /* Skip the if graph is already added to cluster */
212 for (count = 0; count < cluster->nb_graphs; count++)
213 if (cluster->graphs[count] == graph)
214 return 0;
215
216 /* Expand the cluster if required to store graph objects */
217 if (cluster->nb_graphs + 1 > cluster->size) {
218 cluster->size = RTE_MAX(1, cluster->size * 2);
219 sz = sizeof(struct graph *) * cluster->size;
220 cluster->graphs = realloc(cluster->graphs, sz);
221 if (cluster->graphs == NULL)
222 SET_ERR_JMP(ENOMEM, free, "Failed to realloc");
223 }
224
225 /* Add graph to cluster */
226 cluster->graphs[cluster->nb_graphs++] = graph;
227 return 0;
228
229 free:
230 return -rte_errno;
231 }
232
233 static void
cluster_fini(struct cluster * cluster)234 cluster_fini(struct cluster *cluster)
235 {
236 free(cluster->graphs);
237 }
238
239 static int
expand_pattern_to_cluster(struct cluster * cluster,const char * pattern)240 expand_pattern_to_cluster(struct cluster *cluster, const char *pattern)
241 {
242 struct graph_head *graph_head = graph_list_head_get();
243 struct graph *graph;
244 bool found = false;
245
246 /* Check for pattern match */
247 STAILQ_FOREACH(graph, graph_head, next) {
248 if (fnmatch(pattern, graph->name, 0) == 0) {
249 if (cluster_add(cluster, graph))
250 goto fail;
251 found = true;
252 }
253 }
254 if (found == false)
255 SET_ERR_JMP(EFAULT, fail, "Pattern %s graph not found",
256 pattern);
257
258 return 0;
259 fail:
260 return -rte_errno;
261 }
262
263 struct rte_graph_cluster_stats *
rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param * prm)264 rte_graph_cluster_stats_create(const struct rte_graph_cluster_stats_param *prm)
265 {
266 struct rte_graph_cluster_stats *stats, *rc = NULL;
267 struct graph_node *graph_node;
268 struct cluster cluster;
269 struct graph *graph;
270 const char *pattern;
271 rte_graph_t i;
272
273 /* Sanity checks */
274 if (!rte_graph_has_stats_feature())
275 SET_ERR_JMP(EINVAL, fail, "Stats feature is not enabled");
276
277 if (prm == NULL)
278 SET_ERR_JMP(EINVAL, fail, "Invalid param");
279
280 if (prm->graph_patterns == NULL || prm->nb_graph_patterns == 0)
281 SET_ERR_JMP(EINVAL, fail, "Invalid graph param");
282
283 cluster_init(&cluster);
284
285 graph_spinlock_lock();
286 /* Expand graph pattern and add the graph to the cluster */
287 for (i = 0; i < prm->nb_graph_patterns; i++) {
288 pattern = prm->graph_patterns[i];
289 if (expand_pattern_to_cluster(&cluster, pattern))
290 goto bad_pattern;
291 }
292
293 /* Alloc the stats memory */
294 stats = stats_mem_init(&cluster, prm);
295 if (stats == NULL)
296 SET_ERR_JMP(ENOMEM, bad_pattern, "Failed alloc stats memory");
297
298 /* Iterate over M(Graph) x N (Nodes in graph) */
299 for (i = 0; i < cluster.nb_graphs; i++) {
300 graph = cluster.graphs[i];
301 STAILQ_FOREACH(graph_node, &graph->node_list, next) {
302 struct rte_graph *graph_fp = graph->graph;
303 if (stats_mem_populate(&stats, graph_fp, graph_node))
304 goto realloc_fail;
305 }
306 }
307
308 /* Finally copy to hugepage memory to avoid pressure on rte_realloc */
309 rc = rte_malloc_socket(NULL, stats->sz, 0, stats->socket_id);
310 if (rc)
311 rte_memcpy(rc, stats, stats->sz);
312 else
313 SET_ERR_JMP(ENOMEM, realloc_fail, "rte_malloc failed");
314
315 realloc_fail:
316 stats_mem_fini(stats);
317 bad_pattern:
318 graph_spinlock_unlock();
319 cluster_fini(&cluster);
320 fail:
321 return rc;
322 }
323
324 void
rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats * stat)325 rte_graph_cluster_stats_destroy(struct rte_graph_cluster_stats *stat)
326 {
327 return rte_free(stat);
328 }
329
330 static inline void
cluster_node_arregate_stats(struct cluster_node * cluster)331 cluster_node_arregate_stats(struct cluster_node *cluster)
332 {
333 uint64_t calls = 0, cycles = 0, objs = 0, realloc_count = 0;
334 struct rte_graph_cluster_node_stats *stat = &cluster->stat;
335 struct rte_node *node;
336 rte_node_t count;
337
338 for (count = 0; count < cluster->nb_nodes; count++) {
339 node = cluster->nodes[count];
340
341 calls += node->total_calls;
342 objs += node->total_objs;
343 cycles += node->total_cycles;
344 realloc_count += node->realloc_count;
345 }
346
347 stat->calls = calls;
348 stat->objs = objs;
349 stat->cycles = cycles;
350 stat->ts = rte_get_timer_cycles();
351 stat->realloc_count = realloc_count;
352 }
353
354 static inline void
cluster_node_store_prev_stats(struct cluster_node * cluster)355 cluster_node_store_prev_stats(struct cluster_node *cluster)
356 {
357 struct rte_graph_cluster_node_stats *stat = &cluster->stat;
358
359 stat->prev_ts = stat->ts;
360 stat->prev_calls = stat->calls;
361 stat->prev_objs = stat->objs;
362 stat->prev_cycles = stat->cycles;
363 }
364
365 void
rte_graph_cluster_stats_get(struct rte_graph_cluster_stats * stat,bool skip_cb)366 rte_graph_cluster_stats_get(struct rte_graph_cluster_stats *stat, bool skip_cb)
367 {
368 struct cluster_node *cluster;
369 rte_node_t count;
370 int rc = 0;
371
372 cluster = stat->clusters;
373
374 for (count = 0; count < stat->max_nodes; count++) {
375 cluster_node_arregate_stats(cluster);
376 if (!skip_cb)
377 rc = stat->fn(!count, (count == stat->max_nodes - 1),
378 stat->cookie, &cluster->stat);
379 cluster_node_store_prev_stats(cluster);
380 if (rc)
381 break;
382 cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
383 }
384 }
385
386 void
rte_graph_cluster_stats_reset(struct rte_graph_cluster_stats * stat)387 rte_graph_cluster_stats_reset(struct rte_graph_cluster_stats *stat)
388 {
389 struct cluster_node *cluster;
390 rte_node_t count;
391
392 cluster = stat->clusters;
393
394 for (count = 0; count < stat->max_nodes; count++) {
395 struct rte_graph_cluster_node_stats *node = &cluster->stat;
396
397 node->ts = 0;
398 node->calls = 0;
399 node->objs = 0;
400 node->cycles = 0;
401 node->prev_ts = 0;
402 node->prev_calls = 0;
403 node->prev_objs = 0;
404 node->prev_cycles = 0;
405 node->realloc_count = 0;
406 cluster = RTE_PTR_ADD(cluster, stat->cluster_node_size);
407 }
408 }
409