1 /*-
2 * Copyright (c) 2014-2015 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer,
9 * in this position and unchanged.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 __FBSDID("$FreeBSD$");
39
40 static int max_pmc_counters = 1;
41 static int run_all = 0;
42
43 #define MAX_COUNTER_SLOTS 1024
44 #define MAX_NLEN 64
45 #define MAX_CPU 64
46 static int verbose = 0;
47
48 extern char **environ;
49 extern struct expression *master_exp;
50 struct expression *master_exp=NULL;
51
52 #define PMC_INITIAL_ALLOC 512
53 extern char **valid_pmcs;
54 char **valid_pmcs = NULL;
55 extern int valid_pmc_cnt;
56 int valid_pmc_cnt=0;
57 extern int pmc_allocated_cnt;
58 int pmc_allocated_cnt=0;
59
60 /*
61 * The following two varients on popen and pclose with
62 * the cavet that they get you the PID so that you
63 * can supply it to pclose so it can send a SIGTERM
64 * to the process.
65 */
66 static FILE *
my_popen(const char * command,const char * dir,pid_t * p_pid)67 my_popen(const char *command, const char *dir, pid_t *p_pid)
68 {
69 FILE *io_out, *io_in;
70 int pdesin[2], pdesout[2];
71 char *argv[4];
72 pid_t pid;
73 char cmd[4];
74 char cmd2[1024];
75 char arg1[4];
76
77 if ((strcmp(dir, "r") != 0) &&
78 (strcmp(dir, "w") != 0)) {
79 errno = EINVAL;
80 return(NULL);
81 }
82 if (pipe(pdesin) < 0)
83 return (NULL);
84
85 if (pipe(pdesout) < 0) {
86 (void)close(pdesin[0]);
87 (void)close(pdesin[1]);
88 return (NULL);
89 }
90 strcpy(cmd, "sh");
91 strcpy(arg1, "-c");
92 strcpy(cmd2, command);
93 argv[0] = cmd;
94 argv[1] = arg1;
95 argv[2] = cmd2;
96 argv[3] = NULL;
97
98 switch (pid = fork()) {
99 case -1: /* Error. */
100 (void)close(pdesin[0]);
101 (void)close(pdesin[1]);
102 (void)close(pdesout[0]);
103 (void)close(pdesout[1]);
104 return (NULL);
105 /* NOTREACHED */
106 case 0: /* Child. */
107 /* Close out un-used sides */
108 (void)close(pdesin[1]);
109 (void)close(pdesout[0]);
110 /* Now prepare the stdin of the process */
111 close(0);
112 (void)dup(pdesin[0]);
113 (void)close(pdesin[0]);
114 /* Now prepare the stdout of the process */
115 close(1);
116 (void)dup(pdesout[1]);
117 /* And lets do stderr just in case */
118 close(2);
119 (void)dup(pdesout[1]);
120 (void)close(pdesout[1]);
121 /* Now run it */
122 execve("/bin/sh", argv, environ);
123 exit(127);
124 /* NOTREACHED */
125 }
126 /* Parent; assume fdopen can't fail. */
127 /* Store the pid */
128 *p_pid = pid;
129 if (strcmp(dir, "r") != 0) {
130 io_out = fdopen(pdesin[1], "w");
131 (void)close(pdesin[0]);
132 (void)close(pdesout[0]);
133 (void)close(pdesout[1]);
134 return(io_out);
135 } else {
136 /* Prepare the input stream */
137 io_in = fdopen(pdesout[0], "r");
138 (void)close(pdesout[1]);
139 (void)close(pdesin[0]);
140 (void)close(pdesin[1]);
141 return (io_in);
142 }
143 }
144
145 /*
146 * pclose --
147 * Pclose returns -1 if stream is not associated with a `popened' command,
148 * if already `pclosed', or waitpid returns an error.
149 */
150 static void
my_pclose(FILE * io,pid_t the_pid)151 my_pclose(FILE *io, pid_t the_pid)
152 {
153 int pstat;
154 pid_t pid;
155
156 /*
157 * Find the appropriate file pointer and remove it from the list.
158 */
159 (void)fclose(io);
160 /* Die if you are not dead! */
161 kill(the_pid, SIGTERM);
162 do {
163 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
164 } while (pid == -1 && errno == EINTR);
165 }
166
167 struct counters {
168 struct counters *next_cpu;
169 char counter_name[MAX_NLEN]; /* Name of counter */
170 int cpu; /* CPU we are on */
171 int pos; /* Index we are filling to. */
172 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
173 uint64_t sum; /* Summary of entries */
174 };
175
176 extern struct counters *glob_cpu[MAX_CPU];
177 struct counters *glob_cpu[MAX_CPU];
178
179 extern struct counters *cnts;
180 struct counters *cnts=NULL;
181
182 extern int ncnts;
183 int ncnts=0;
184
185 extern int (*expression)(struct counters *, int);
186 int (*expression)(struct counters *, int);
187
188 static const char *threshold=NULL;
189 static const char *command;
190
191 struct cpu_entry {
192 const char *name;
193 const char *thresh;
194 const char *command;
195 int (*func)(struct counters *, int);
196 int counters_required;
197 };
198
199 struct cpu_type {
200 char cputype[32];
201 int number;
202 struct cpu_entry *ents;
203 void (*explain)(const char *name);
204 };
205 extern struct cpu_type the_cpu;
206 struct cpu_type the_cpu;
207
208 static void
explain_name_sb(const char * name)209 explain_name_sb(const char *name)
210 {
211 const char *mythresh;
212 if (strcmp(name, "allocstall1") == 0) {
213 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
214 mythresh = "thresh > .05";
215 } else if (strcmp(name, "allocstall2") == 0) {
216 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
217 mythresh = "thresh > .05";
218 } else if (strcmp(name, "br_miss") == 0) {
219 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
220 mythresh = "thresh >= .2";
221 } else if (strcmp(name, "splitload") == 0) {
222 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
223 mythresh = "thresh >= .1";
224 } else if (strcmp(name, "splitstore") == 0) {
225 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
226 mythresh = "thresh >= .01";
227 } else if (strcmp(name, "contested") == 0) {
228 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
229 mythresh = "thresh >= .05";
230 } else if (strcmp(name, "blockstorefwd") == 0) {
231 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
232 mythresh = "thresh >= .05";
233 } else if (strcmp(name, "cache2") == 0) {
234 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
235 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
236 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
237 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
238 mythresh = "thresh >= .2";
239 } else if (strcmp(name, "cache1") == 0) {
240 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
241 mythresh = "thresh >= .2";
242 } else if (strcmp(name, "dtlbmissload") == 0) {
243 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
244 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
245 mythresh = "thresh >= .1";
246 } else if (strcmp(name, "frontendstall") == 0) {
247 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
248 mythresh = "thresh >= .15";
249 } else if (strcmp(name, "clears") == 0) {
250 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
251 printf(" MACHINE_CLEARS.SMC + \n");
252 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
253 mythresh = "thresh >= .02";
254 } else if (strcmp(name, "microassist") == 0) {
255 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
256 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
257 mythresh = "thresh >= .05";
258 } else if (strcmp(name, "aliasing_4k") == 0) {
259 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
260 mythresh = "thresh >= .1";
261 } else if (strcmp(name, "fpassist") == 0) {
262 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
263 mythresh = "look for a excessive value";
264 } else if (strcmp(name, "otherassistavx") == 0) {
265 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
266 mythresh = "look for a excessive value";
267 } else if (strcmp(name, "otherassistsse") == 0) {
268 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
269 mythresh = "look for a excessive value";
270 } else if (strcmp(name, "eff1") == 0) {
271 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
272 mythresh = "thresh < .9";
273 } else if (strcmp(name, "eff2") == 0) {
274 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
275 mythresh = "thresh > 1.0";
276 } else if (strcmp(name, "dtlbmissstore") == 0) {
277 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
278 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
279 mythresh = "thresh >= .05";
280 } else {
281 printf("Unknown name:%s\n", name);
282 mythresh = "unknown entry";
283 }
284 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
285 }
286
287 static void
explain_name_ib(const char * name)288 explain_name_ib(const char *name)
289 {
290 const char *mythresh;
291 if (strcmp(name, "br_miss") == 0) {
292 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
293 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
294 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
295 mythresh = "thresh >= .2";
296 } else if (strcmp(name, "eff1") == 0) {
297 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
298 mythresh = "thresh < .9";
299 } else if (strcmp(name, "eff2") == 0) {
300 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
301 mythresh = "thresh > 1.0";
302 } else if (strcmp(name, "cache1") == 0) {
303 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
304 mythresh = "thresh >= .2";
305 } else if (strcmp(name, "cache2") == 0) {
306 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
307 mythresh = "thresh >= .2";
308 } else if (strcmp(name, "itlbmiss") == 0) {
309 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
310 mythresh = "thresh > .05";
311 } else if (strcmp(name, "icachemiss") == 0) {
312 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
313 mythresh = "thresh > .05";
314 } else if (strcmp(name, "lcpstall") == 0) {
315 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
316 mythresh = "thresh > .05";
317 } else if (strcmp(name, "datashare") == 0) {
318 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
319 mythresh = "thresh > .05";
320 } else if (strcmp(name, "blockstorefwd") == 0) {
321 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
322 mythresh = "thresh >= .05";
323 } else if (strcmp(name, "splitload") == 0) {
324 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
325 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
326 mythresh = "thresh >= .1";
327 } else if (strcmp(name, "splitstore") == 0) {
328 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
329 mythresh = "thresh >= .01";
330 } else if (strcmp(name, "aliasing_4k") == 0) {
331 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
332 mythresh = "thresh >= .1";
333 } else if (strcmp(name, "dtlbmissload") == 0) {
334 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
335 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
336 mythresh = "thresh >= .1";
337 } else if (strcmp(name, "dtlbmissstore") == 0) {
338 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
339 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
340 mythresh = "thresh >= .05";
341 } else if (strcmp(name, "contested") == 0) {
342 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
343 mythresh = "thresh >= .05";
344 } else if (strcmp(name, "clears") == 0) {
345 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
346 printf(" MACHINE_CLEARS.SMC + \n");
347 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
348 mythresh = "thresh >= .02";
349 } else if (strcmp(name, "microassist") == 0) {
350 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
351 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
352 mythresh = "thresh >= .05";
353 } else if (strcmp(name, "fpassist") == 0) {
354 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
355 mythresh = "look for a excessive value";
356 } else if (strcmp(name, "otherassistavx") == 0) {
357 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
358 mythresh = "look for a excessive value";
359 } else if (strcmp(name, "otherassistsse") == 0) {
360 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
361 mythresh = "look for a excessive value";
362 } else {
363 printf("Unknown name:%s\n", name);
364 mythresh = "unknown entry";
365 }
366 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
367 }
368
369
370 static void
explain_name_has(const char * name)371 explain_name_has(const char *name)
372 {
373 const char *mythresh;
374 if (strcmp(name, "eff1") == 0) {
375 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
376 mythresh = "thresh < .75";
377 } else if (strcmp(name, "eff2") == 0) {
378 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
379 mythresh = "thresh > 1.0";
380 } else if (strcmp(name, "itlbmiss") == 0) {
381 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
382 mythresh = "thresh > .05";
383 } else if (strcmp(name, "icachemiss") == 0) {
384 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
385 mythresh = "thresh > .05";
386 } else if (strcmp(name, "lcpstall") == 0) {
387 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
388 mythresh = "thresh > .05";
389 } else if (strcmp(name, "cache1") == 0) {
390 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
391 mythresh = "thresh >= .2";
392 } else if (strcmp(name, "cache2") == 0) {
393 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
394 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
395 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
396 printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
397 mythresh = "thresh >= .2";
398 } else if (strcmp(name, "contested") == 0) {
399 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
400 mythresh = "thresh >= .05";
401 } else if (strcmp(name, "datashare") == 0) {
402 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
403 mythresh = "thresh > .05";
404 } else if (strcmp(name, "blockstorefwd") == 0) {
405 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
406 mythresh = "thresh >= .05";
407 } else if (strcmp(name, "splitload") == 0) {
408 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
409 mythresh = "thresh >= .1";
410 } else if (strcmp(name, "splitstore") == 0) {
411 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
412 mythresh = "thresh >= .01";
413 } else if (strcmp(name, "aliasing_4k") == 0) {
414 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
415 mythresh = "thresh >= .1";
416 } else if (strcmp(name, "dtlbmissload") == 0) {
417 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
418 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
419 mythresh = "thresh >= .1";
420 } else if (strcmp(name, "br_miss") == 0) {
421 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
422 mythresh = "thresh >= .2";
423 } else if (strcmp(name, "clears") == 0) {
424 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
425 printf(" MACHINE_CLEARS.SMC + \n");
426 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
427 mythresh = "thresh >= .02";
428 } else if (strcmp(name, "microassist") == 0) {
429 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
430 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
431 mythresh = "thresh >= .05";
432 } else if (strcmp(name, "fpassist") == 0) {
433 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
434 mythresh = "look for a excessive value";
435 } else if (strcmp(name, "otherassistavx") == 0) {
436 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
437 mythresh = "look for a excessive value";
438 } else if (strcmp(name, "otherassistsse") == 0) {
439 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
440 mythresh = "look for a excessive value";
441 } else {
442 printf("Unknown name:%s\n", name);
443 mythresh = "unknown entry";
444 }
445 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
446 }
447
448
449
450 static struct counters *
find_counter(struct counters * base,const char * name)451 find_counter(struct counters *base, const char *name)
452 {
453 struct counters *at;
454 int len;
455
456 at = base;
457 len = strlen(name);
458 while(at) {
459 if (strncmp(at->counter_name, name, len) == 0) {
460 return(at);
461 }
462 at = at->next_cpu;
463 }
464 printf("Can't find counter %s\n", name);
465 printf("We have:\n");
466 at = base;
467 while(at) {
468 printf("- %s\n", at->counter_name);
469 at = at->next_cpu;
470 }
471 exit(-1);
472 }
473
474 static int
allocstall1(struct counters * cpu,int pos)475 allocstall1(struct counters *cpu, int pos)
476 {
477 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
478 int ret;
479 struct counters *partial;
480 struct counters *unhalt;
481 double un, par, res;
482 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
483 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
484 if (pos != -1) {
485 par = partial->vals[pos] * 1.0;
486 un = unhalt->vals[pos] * 1.0;
487 } else {
488 par = partial->sum * 1.0;
489 un = unhalt->sum * 1.0;
490 }
491 res = par/un;
492 ret = printf("%1.3f", res);
493 return(ret);
494 }
495
496 static int
allocstall2(struct counters * cpu,int pos)497 allocstall2(struct counters *cpu, int pos)
498 {
499 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
500 int ret;
501 struct counters *partial;
502 struct counters *unhalt;
503 double un, par, res;
504 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
505 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
506 if (pos != -1) {
507 par = partial->vals[pos] * 1.0;
508 un = unhalt->vals[pos] * 1.0;
509 } else {
510 par = partial->sum * 1.0;
511 un = unhalt->sum * 1.0;
512 }
513 res = par/un;
514 ret = printf("%1.3f", res);
515 return(ret);
516 }
517
518 static int
br_mispredict(struct counters * cpu,int pos)519 br_mispredict(struct counters *cpu, int pos)
520 {
521 struct counters *brctr;
522 struct counters *unhalt;
523 int ret;
524 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
525 double br, un, con, res;
526 con = 20.0;
527
528 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
529 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
530 if (pos != -1) {
531 br = brctr->vals[pos] * 1.0;
532 un = unhalt->vals[pos] * 1.0;
533 } else {
534 br = brctr->sum * 1.0;
535 un = unhalt->sum * 1.0;
536 }
537 res = (con * br)/un;
538 ret = printf("%1.3f", res);
539 return(ret);
540 }
541
542 static int
br_mispredictib(struct counters * cpu,int pos)543 br_mispredictib(struct counters *cpu, int pos)
544 {
545 struct counters *brctr;
546 struct counters *unhalt;
547 struct counters *clear, *clear2, *clear3;
548 struct counters *uops;
549 struct counters *recv;
550 struct counters *iss;
551 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
552 int ret;
553 /*
554 * (BR_MISP_RETIRED.ALL_BRANCHES /
555 * (BR_MISP_RETIRED.ALL_BRANCHES +
556 * MACHINE_CLEAR.COUNT) *
557 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
558 *
559 */
560 double br, cl, cl2, cl3, uo, re, un, con, res, is;
561 con = 4.0;
562
563 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
564 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
565 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
566 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
567 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
568 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
569 iss = find_counter(cpu, "UOPS_ISSUED.ANY");
570 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
571 if (pos != -1) {
572 br = brctr->vals[pos] * 1.0;
573 cl = clear->vals[pos] * 1.0;
574 cl2 = clear2->vals[pos] * 1.0;
575 cl3 = clear3->vals[pos] * 1.0;
576 uo = uops->vals[pos] * 1.0;
577 re = recv->vals[pos] * 1.0;
578 is = iss->vals[pos] * 1.0;
579 un = unhalt->vals[pos] * 1.0;
580 } else {
581 br = brctr->sum * 1.0;
582 cl = clear->sum * 1.0;
583 cl2 = clear2->sum * 1.0;
584 cl3 = clear3->sum * 1.0;
585 uo = uops->sum * 1.0;
586 re = recv->sum * 1.0;
587 is = iss->sum * 1.0;
588 un = unhalt->sum * 1.0;
589 }
590 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
591 ret = printf("%1.3f", res);
592 return(ret);
593 }
594
595
596 static int
br_mispredict_broad(struct counters * cpu,int pos)597 br_mispredict_broad(struct counters *cpu, int pos)
598 {
599 struct counters *brctr;
600 struct counters *unhalt;
601 struct counters *clear;
602 struct counters *uops;
603 struct counters *uops_ret;
604 struct counters *recv;
605 int ret;
606 double br, cl, uo, uo_r, re, con, un, res;
607
608 con = 4.0;
609
610 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
611 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
612 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
613 uops = find_counter(cpu, "UOPS_ISSUED.ANY");
614 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
615 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
616
617 if (pos != -1) {
618 un = unhalt->vals[pos] * 1.0;
619 br = brctr->vals[pos] * 1.0;
620 cl = clear->vals[pos] * 1.0;
621 uo = uops->vals[pos] * 1.0;
622 uo_r = uops_ret->vals[pos] * 1.0;
623 re = recv->vals[pos] * 1.0;
624 } else {
625 un = unhalt->sum * 1.0;
626 br = brctr->sum * 1.0;
627 cl = clear->sum * 1.0;
628 uo = uops->sum * 1.0;
629 uo_r = uops_ret->sum * 1.0;
630 re = recv->sum * 1.0;
631 }
632 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
633 ret = printf("%1.3f", res);
634 return(ret);
635 }
636
637 static int
splitloadib(struct counters * cpu,int pos)638 splitloadib(struct counters *cpu, int pos)
639 {
640 int ret;
641 struct counters *mem;
642 struct counters *l1d, *ldblock;
643 struct counters *unhalt;
644 double un, memd, res, l1, ldb;
645 /*
646 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
647 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
648 */
649
650 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
651 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
652 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
653 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
654 if (pos != -1) {
655 memd = mem->vals[pos] * 1.0;
656 l1 = l1d->vals[pos] * 1.0;
657 ldb = ldblock->vals[pos] * 1.0;
658 un = unhalt->vals[pos] * 1.0;
659 } else {
660 memd = mem->sum * 1.0;
661 l1 = l1d->sum * 1.0;
662 ldb = ldblock->sum * 1.0;
663 un = unhalt->sum * 1.0;
664 }
665 res = ((l1 / memd) * ldb)/un;
666 ret = printf("%1.3f", res);
667 return(ret);
668 }
669
670
671 static int
splitload(struct counters * cpu,int pos)672 splitload(struct counters *cpu, int pos)
673 {
674 int ret;
675 struct counters *mem;
676 struct counters *unhalt;
677 double con, un, memd, res;
678 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
679
680 con = 5.0;
681 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
682 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
683 if (pos != -1) {
684 memd = mem->vals[pos] * 1.0;
685 un = unhalt->vals[pos] * 1.0;
686 } else {
687 memd = mem->sum * 1.0;
688 un = unhalt->sum * 1.0;
689 }
690 res = (memd * con)/un;
691 ret = printf("%1.3f", res);
692 return(ret);
693 }
694
695
696 static int
splitload_sb(struct counters * cpu,int pos)697 splitload_sb(struct counters *cpu, int pos)
698 {
699 int ret;
700 struct counters *mem;
701 struct counters *unhalt;
702 double con, un, memd, res;
703 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
704
705 con = 5.0;
706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
708 if (pos != -1) {
709 memd = mem->vals[pos] * 1.0;
710 un = unhalt->vals[pos] * 1.0;
711 } else {
712 memd = mem->sum * 1.0;
713 un = unhalt->sum * 1.0;
714 }
715 res = (memd * con)/un;
716 ret = printf("%1.3f", res);
717 return(ret);
718 }
719
720
721 static int
splitstore_sb(struct counters * cpu,int pos)722 splitstore_sb(struct counters *cpu, int pos)
723 {
724 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
725 int ret;
726 struct counters *mem_split;
727 struct counters *mem_stores;
728 double memsplit, memstore, res;
729 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
730 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
731 if (pos != -1) {
732 memsplit = mem_split->vals[pos] * 1.0;
733 memstore = mem_stores->vals[pos] * 1.0;
734 } else {
735 memsplit = mem_split->sum * 1.0;
736 memstore = mem_stores->sum * 1.0;
737 }
738 res = memsplit/memstore;
739 ret = printf("%1.3f", res);
740 return(ret);
741 }
742
743
744
745 static int
splitstore(struct counters * cpu,int pos)746 splitstore(struct counters *cpu, int pos)
747 {
748 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
749 int ret;
750 struct counters *mem_split;
751 struct counters *mem_stores;
752 double memsplit, memstore, res;
753 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
754 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
755 if (pos != -1) {
756 memsplit = mem_split->vals[pos] * 1.0;
757 memstore = mem_stores->vals[pos] * 1.0;
758 } else {
759 memsplit = mem_split->sum * 1.0;
760 memstore = mem_stores->sum * 1.0;
761 }
762 res = memsplit/memstore;
763 ret = printf("%1.3f", res);
764 return(ret);
765 }
766
767
768 static int
contested(struct counters * cpu,int pos)769 contested(struct counters *cpu, int pos)
770 {
771 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
772 int ret;
773 struct counters *mem;
774 struct counters *unhalt;
775 double con, un, memd, res;
776
777 con = 60.0;
778 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
779 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
780 if (pos != -1) {
781 memd = mem->vals[pos] * 1.0;
782 un = unhalt->vals[pos] * 1.0;
783 } else {
784 memd = mem->sum * 1.0;
785 un = unhalt->sum * 1.0;
786 }
787 res = (memd * con)/un;
788 ret = printf("%1.3f", res);
789 return(ret);
790 }
791
792 static int
contested_has(struct counters * cpu,int pos)793 contested_has(struct counters *cpu, int pos)
794 {
795 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
796 int ret;
797 struct counters *mem;
798 struct counters *unhalt;
799 double con, un, memd, res;
800
801 con = 84.0;
802 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
803 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
804 if (pos != -1) {
805 memd = mem->vals[pos] * 1.0;
806 un = unhalt->vals[pos] * 1.0;
807 } else {
808 memd = mem->sum * 1.0;
809 un = unhalt->sum * 1.0;
810 }
811 res = (memd * con)/un;
812 ret = printf("%1.3f", res);
813 return(ret);
814 }
815
816 static int
contestedbroad(struct counters * cpu,int pos)817 contestedbroad(struct counters *cpu, int pos)
818 {
819 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
820 int ret;
821 struct counters *mem;
822 struct counters *mem2;
823 struct counters *unhalt;
824 double con, un, memd, memtoo, res;
825
826 con = 84.0;
827 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
828 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
829 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
830
831 if (pos != -1) {
832 memd = mem->vals[pos] * 1.0;
833 memtoo = mem2->vals[pos] * 1.0;
834 un = unhalt->vals[pos] * 1.0;
835 } else {
836 memd = mem->sum * 1.0;
837 memtoo = mem2->sum * 1.0;
838 un = unhalt->sum * 1.0;
839 }
840 res = ((memd * con) + memtoo)/un;
841 ret = printf("%1.3f", res);
842 return(ret);
843 }
844
845
846 static int
blockstoreforward(struct counters * cpu,int pos)847 blockstoreforward(struct counters *cpu, int pos)
848 {
849 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
850 int ret;
851 struct counters *ldb;
852 struct counters *unhalt;
853 double con, un, ld, res;
854
855 con = 13.0;
856 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
857 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
858 if (pos != -1) {
859 ld = ldb->vals[pos] * 1.0;
860 un = unhalt->vals[pos] * 1.0;
861 } else {
862 ld = ldb->sum * 1.0;
863 un = unhalt->sum * 1.0;
864 }
865 res = (ld * con)/un;
866 ret = printf("%1.3f", res);
867 return(ret);
868 }
869
870 static int
cache2(struct counters * cpu,int pos)871 cache2(struct counters *cpu, int pos)
872 {
873 /* ** Suspect ***
874 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
875 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
876 */
877 int ret;
878 struct counters *mem1, *mem2, *mem3;
879 struct counters *unhalt;
880 double con1, con2, con3, un, me_1, me_2, me_3, res;
881
882 con1 = 26.0;
883 con2 = 43.0;
884 con3 = 60.0;
885 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
886 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
887 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
888 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
889 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
890 if (pos != -1) {
891 me_1 = mem1->vals[pos] * 1.0;
892 me_2 = mem2->vals[pos] * 1.0;
893 me_3 = mem3->vals[pos] * 1.0;
894 un = unhalt->vals[pos] * 1.0;
895 } else {
896 me_1 = mem1->sum * 1.0;
897 me_2 = mem2->sum * 1.0;
898 me_3 = mem3->sum * 1.0;
899 un = unhalt->sum * 1.0;
900 }
901 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
902 ret = printf("%1.3f", res);
903 return(ret);
904 }
905
906 static int
datasharing(struct counters * cpu,int pos)907 datasharing(struct counters *cpu, int pos)
908 {
909 /*
910 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
911 */
912 int ret;
913 struct counters *mem;
914 struct counters *unhalt;
915 double con, res, me, un;
916
917 con = 43.0;
918 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
920 if (pos != -1) {
921 me = mem->vals[pos] * 1.0;
922 un = unhalt->vals[pos] * 1.0;
923 } else {
924 me = mem->sum * 1.0;
925 un = unhalt->sum * 1.0;
926 }
927 res = (me * con)/un;
928 ret = printf("%1.3f", res);
929 return(ret);
930
931 }
932
933
934 static int
datasharing_has(struct counters * cpu,int pos)935 datasharing_has(struct counters *cpu, int pos)
936 {
937 /*
938 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
939 */
940 int ret;
941 struct counters *mem;
942 struct counters *unhalt;
943 double con, res, me, un;
944
945 con = 72.0;
946 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
947 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
948 if (pos != -1) {
949 me = mem->vals[pos] * 1.0;
950 un = unhalt->vals[pos] * 1.0;
951 } else {
952 me = mem->sum * 1.0;
953 un = unhalt->sum * 1.0;
954 }
955 res = (me * con)/un;
956 ret = printf("%1.3f", res);
957 return(ret);
958
959 }
960
961
962 static int
cache2ib(struct counters * cpu,int pos)963 cache2ib(struct counters *cpu, int pos)
964 {
965 /*
966 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
967 */
968 int ret;
969 struct counters *mem;
970 struct counters *unhalt;
971 double con, un, me, res;
972
973 con = 29.0;
974 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
975 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
976 if (pos != -1) {
977 me = mem->vals[pos] * 1.0;
978 un = unhalt->vals[pos] * 1.0;
979 } else {
980 me = mem->sum * 1.0;
981 un = unhalt->sum * 1.0;
982 }
983 res = (con * me)/un;
984 ret = printf("%1.3f", res);
985 return(ret);
986 }
987
988 static int
cache2has(struct counters * cpu,int pos)989 cache2has(struct counters *cpu, int pos)
990 {
991 /*
992 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
993 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
994 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
995 * / CPU_CLK_UNHALTED.THREAD_P
996 */
997 int ret;
998 struct counters *mem1, *mem2, *mem3;
999 struct counters *unhalt;
1000 double con1, con2, con3, un, me1, me2, me3, res;
1001
1002 con1 = 36.0;
1003 con2 = 72.0;
1004 con3 = 84.0;
1005 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1006 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1007 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1008 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1009 if (pos != -1) {
1010 me1 = mem1->vals[pos] * 1.0;
1011 me2 = mem2->vals[pos] * 1.0;
1012 me3 = mem3->vals[pos] * 1.0;
1013 un = unhalt->vals[pos] * 1.0;
1014 } else {
1015 me1 = mem1->sum * 1.0;
1016 me2 = mem2->sum * 1.0;
1017 me3 = mem3->sum * 1.0;
1018 un = unhalt->sum * 1.0;
1019 }
1020 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1021 ret = printf("%1.3f", res);
1022 return(ret);
1023 }
1024
1025
1026 static int
cache2broad(struct counters * cpu,int pos)1027 cache2broad(struct counters *cpu, int pos)
1028 {
1029 /*
1030 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1031 */
1032 int ret;
1033 struct counters *mem;
1034 struct counters *unhalt;
1035 double con, un, me, res;
1036
1037 con = 36.0;
1038 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1039 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1040 if (pos != -1) {
1041 me = mem->vals[pos] * 1.0;
1042 un = unhalt->vals[pos] * 1.0;
1043 } else {
1044 me = mem->sum * 1.0;
1045 un = unhalt->sum * 1.0;
1046 }
1047 res = (con * me)/un;
1048 ret = printf("%1.3f", res);
1049 return(ret);
1050 }
1051
1052
1053 static int
cache1(struct counters * cpu,int pos)1054 cache1(struct counters *cpu, int pos)
1055 {
1056 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1057 int ret;
1058 struct counters *mem;
1059 struct counters *unhalt;
1060 double con, un, me, res;
1061
1062 con = 180.0;
1063 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1064 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1065 if (pos != -1) {
1066 me = mem->vals[pos] * 1.0;
1067 un = unhalt->vals[pos] * 1.0;
1068 } else {
1069 me = mem->sum * 1.0;
1070 un = unhalt->sum * 1.0;
1071 }
1072 res = (me * con)/un;
1073 ret = printf("%1.3f", res);
1074 return(ret);
1075 }
1076
1077 static int
cache1ib(struct counters * cpu,int pos)1078 cache1ib(struct counters *cpu, int pos)
1079 {
1080 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1081 int ret;
1082 struct counters *mem;
1083 struct counters *unhalt;
1084 double con, un, me, res;
1085
1086 con = 180.0;
1087 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1088 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1089 if (pos != -1) {
1090 me = mem->vals[pos] * 1.0;
1091 un = unhalt->vals[pos] * 1.0;
1092 } else {
1093 me = mem->sum * 1.0;
1094 un = unhalt->sum * 1.0;
1095 }
1096 res = (me * con)/un;
1097 ret = printf("%1.3f", res);
1098 return(ret);
1099 }
1100
1101
1102 static int
cache1broad(struct counters * cpu,int pos)1103 cache1broad(struct counters *cpu, int pos)
1104 {
1105 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1106 int ret;
1107 struct counters *mem;
1108 struct counters *unhalt;
1109 double con, un, me, res;
1110
1111 con = 180.0;
1112 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1113 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1114 if (pos != -1) {
1115 me = mem->vals[pos] * 1.0;
1116 un = unhalt->vals[pos] * 1.0;
1117 } else {
1118 me = mem->sum * 1.0;
1119 un = unhalt->sum * 1.0;
1120 }
1121 res = (me * con)/un;
1122 ret = printf("%1.3f", res);
1123 return(ret);
1124 }
1125
1126
1127 static int
dtlb_missload(struct counters * cpu,int pos)1128 dtlb_missload(struct counters *cpu, int pos)
1129 {
1130 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1131 int ret;
1132 struct counters *dtlb_m, *dtlb_d;
1133 struct counters *unhalt;
1134 double con, un, d1, d2, res;
1135
1136 con = 7.0;
1137 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1138 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1139 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1140 if (pos != -1) {
1141 d1 = dtlb_m->vals[pos] * 1.0;
1142 d2 = dtlb_d->vals[pos] * 1.0;
1143 un = unhalt->vals[pos] * 1.0;
1144 } else {
1145 d1 = dtlb_m->sum * 1.0;
1146 d2 = dtlb_d->sum * 1.0;
1147 un = unhalt->sum * 1.0;
1148 }
1149 res = ((d1 * con) + d2)/un;
1150 ret = printf("%1.3f", res);
1151 return(ret);
1152 }
1153
1154 static int
dtlb_missstore(struct counters * cpu,int pos)1155 dtlb_missstore(struct counters *cpu, int pos)
1156 {
1157 /*
1158 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1159 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1160 */
1161 int ret;
1162 struct counters *dtsb_m, *dtsb_d;
1163 struct counters *unhalt;
1164 double con, un, d1, d2, res;
1165
1166 con = 7.0;
1167 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1168 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1169 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1170 if (pos != -1) {
1171 d1 = dtsb_m->vals[pos] * 1.0;
1172 d2 = dtsb_d->vals[pos] * 1.0;
1173 un = unhalt->vals[pos] * 1.0;
1174 } else {
1175 d1 = dtsb_m->sum * 1.0;
1176 d2 = dtsb_d->sum * 1.0;
1177 un = unhalt->sum * 1.0;
1178 }
1179 res = ((d1 * con) + d2)/un;
1180 ret = printf("%1.3f", res);
1181 return(ret);
1182 }
1183
1184 static int
itlb_miss(struct counters * cpu,int pos)1185 itlb_miss(struct counters *cpu, int pos)
1186 {
1187 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
1188 int ret;
1189 struct counters *itlb;
1190 struct counters *unhalt;
1191 double un, d1, res;
1192
1193 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1194 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1195 if (pos != -1) {
1196 d1 = itlb->vals[pos] * 1.0;
1197 un = unhalt->vals[pos] * 1.0;
1198 } else {
1199 d1 = itlb->sum * 1.0;
1200 un = unhalt->sum * 1.0;
1201 }
1202 res = d1/un;
1203 ret = printf("%1.3f", res);
1204 return(ret);
1205 }
1206
1207
1208 static int
itlb_miss_broad(struct counters * cpu,int pos)1209 itlb_miss_broad(struct counters *cpu, int pos)
1210 {
1211 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
1212 int ret;
1213 struct counters *itlb;
1214 struct counters *unhalt;
1215 struct counters *four_k;
1216 double un, d1, res, k;
1217
1218 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1219 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1220 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1221 if (pos != -1) {
1222 d1 = itlb->vals[pos] * 1.0;
1223 un = unhalt->vals[pos] * 1.0;
1224 k = four_k->vals[pos] * 1.0;
1225 } else {
1226 d1 = itlb->sum * 1.0;
1227 un = unhalt->sum * 1.0;
1228 k = four_k->sum * 1.0;
1229 }
1230 res = (7.0 * k + d1)/un;
1231 ret = printf("%1.3f", res);
1232 return(ret);
1233 }
1234
1235
1236 static int
icache_miss(struct counters * cpu,int pos)1237 icache_miss(struct counters *cpu, int pos)
1238 {
1239 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1240
1241 int ret;
1242 struct counters *itlb, *icache;
1243 struct counters *unhalt;
1244 double un, d1, ic, res;
1245
1246 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1247 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1248 icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1249 if (pos != -1) {
1250 d1 = itlb->vals[pos] * 1.0;
1251 ic = icache->vals[pos] * 1.0;
1252 un = unhalt->vals[pos] * 1.0;
1253 } else {
1254 d1 = itlb->sum * 1.0;
1255 ic = icache->sum * 1.0;
1256 un = unhalt->sum * 1.0;
1257 }
1258 res = (ic-d1)/un;
1259 ret = printf("%1.3f", res);
1260 return(ret);
1261
1262 }
1263
1264 static int
icache_miss_has(struct counters * cpu,int pos)1265 icache_miss_has(struct counters *cpu, int pos)
1266 {
1267 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1268
1269 int ret;
1270 struct counters *icache;
1271 struct counters *unhalt;
1272 double un, con, ic, res;
1273
1274 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1275 icache = find_counter(cpu, "ICACHE.MISSES");
1276 con = 36.0;
1277 if (pos != -1) {
1278 ic = icache->vals[pos] * 1.0;
1279 un = unhalt->vals[pos] * 1.0;
1280 } else {
1281 ic = icache->sum * 1.0;
1282 un = unhalt->sum * 1.0;
1283 }
1284 res = (con * ic)/un;
1285 ret = printf("%1.3f", res);
1286 return(ret);
1287
1288 }
1289
1290 static int
lcp_stall(struct counters * cpu,int pos)1291 lcp_stall(struct counters *cpu, int pos)
1292 {
1293 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1294 int ret;
1295 struct counters *ild;
1296 struct counters *unhalt;
1297 double un, d1, res;
1298
1299 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1300 ild = find_counter(cpu, "ILD_STALL.LCP");
1301 if (pos != -1) {
1302 d1 = ild->vals[pos] * 1.0;
1303 un = unhalt->vals[pos] * 1.0;
1304 } else {
1305 d1 = ild->sum * 1.0;
1306 un = unhalt->sum * 1.0;
1307 }
1308 res = d1/un;
1309 ret = printf("%1.3f", res);
1310 return(ret);
1311
1312 }
1313
1314
1315 static int
frontendstall(struct counters * cpu,int pos)1316 frontendstall(struct counters *cpu, int pos)
1317 {
1318 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1319 int ret;
1320 struct counters *idq;
1321 struct counters *unhalt;
1322 double con, un, id, res;
1323
1324 con = 4.0;
1325 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1326 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1327 if (pos != -1) {
1328 id = idq->vals[pos] * 1.0;
1329 un = unhalt->vals[pos] * 1.0;
1330 } else {
1331 id = idq->sum * 1.0;
1332 un = unhalt->sum * 1.0;
1333 }
1334 res = id/(un * con);
1335 ret = printf("%1.3f", res);
1336 return(ret);
1337 }
1338
1339 static int
clears(struct counters * cpu,int pos)1340 clears(struct counters *cpu, int pos)
1341 {
1342 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1343 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
1344
1345 int ret;
1346 struct counters *clr1, *clr2, *clr3;
1347 struct counters *unhalt;
1348 double con, un, cl1, cl2, cl3, res;
1349
1350 con = 100.0;
1351 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1352 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1353 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1354 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1355
1356 if (pos != -1) {
1357 cl1 = clr1->vals[pos] * 1.0;
1358 cl2 = clr2->vals[pos] * 1.0;
1359 cl3 = clr3->vals[pos] * 1.0;
1360 un = unhalt->vals[pos] * 1.0;
1361 } else {
1362 cl1 = clr1->sum * 1.0;
1363 cl2 = clr2->sum * 1.0;
1364 cl3 = clr3->sum * 1.0;
1365 un = unhalt->sum * 1.0;
1366 }
1367 res = ((cl1 + cl2 + cl3) * con)/un;
1368 ret = printf("%1.3f", res);
1369 return(ret);
1370 }
1371
1372
1373
1374 static int
clears_broad(struct counters * cpu,int pos)1375 clears_broad(struct counters *cpu, int pos)
1376 {
1377 int ret;
1378 struct counters *clr1, *clr2, *clr3, *cyc;
1379 struct counters *unhalt;
1380 double con, un, cl1, cl2, cl3, cy, res;
1381
1382 con = 100.0;
1383 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1385 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1386 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1387 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1388 if (pos != -1) {
1389 cl1 = clr1->vals[pos] * 1.0;
1390 cl2 = clr2->vals[pos] * 1.0;
1391 cl3 = clr3->vals[pos] * 1.0;
1392 cy = cyc->vals[pos] * 1.0;
1393 un = unhalt->vals[pos] * 1.0;
1394 } else {
1395 cl1 = clr1->sum * 1.0;
1396 cl2 = clr2->sum * 1.0;
1397 cl3 = clr3->sum * 1.0;
1398 cy = cyc->sum * 1.0;
1399 un = unhalt->sum * 1.0;
1400 }
1401 /* Formula not listed but extrapulated to add the cy ?? */
1402 res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1403 ret = printf("%1.3f", res);
1404 return(ret);
1405 }
1406
1407
1408
1409
1410
1411 static int
microassist(struct counters * cpu,int pos)1412 microassist(struct counters *cpu, int pos)
1413 {
1414 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1415 int ret;
1416 struct counters *idq;
1417 struct counters *unhalt;
1418 double un, id, res, con;
1419
1420 con = 4.0;
1421 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1422 idq = find_counter(cpu, "IDQ.MS_UOPS");
1423 if (pos != -1) {
1424 id = idq->vals[pos] * 1.0;
1425 un = unhalt->vals[pos] * 1.0;
1426 } else {
1427 id = idq->sum * 1.0;
1428 un = unhalt->sum * 1.0;
1429 }
1430 res = id/(un * con);
1431 ret = printf("%1.3f", res);
1432 return(ret);
1433 }
1434
1435
1436 static int
microassist_broad(struct counters * cpu,int pos)1437 microassist_broad(struct counters *cpu, int pos)
1438 {
1439 int ret;
1440 struct counters *idq;
1441 struct counters *unhalt;
1442 struct counters *uopiss;
1443 struct counters *uopret;
1444 double un, id, res, con, uoi, uor;
1445
1446 con = 4.0;
1447 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1448 idq = find_counter(cpu, "IDQ.MS_UOPS");
1449 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1450 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1451 if (pos != -1) {
1452 id = idq->vals[pos] * 1.0;
1453 un = unhalt->vals[pos] * 1.0;
1454 uoi = uopiss->vals[pos] * 1.0;
1455 uor = uopret->vals[pos] * 1.0;
1456 } else {
1457 id = idq->sum * 1.0;
1458 un = unhalt->sum * 1.0;
1459 uoi = uopiss->sum * 1.0;
1460 uor = uopret->sum * 1.0;
1461 }
1462 res = (uor/uoi) * (id/(un * con));
1463 ret = printf("%1.3f", res);
1464 return(ret);
1465 }
1466
1467
1468 static int
aliasing(struct counters * cpu,int pos)1469 aliasing(struct counters *cpu, int pos)
1470 {
1471 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1472 int ret;
1473 struct counters *ld;
1474 struct counters *unhalt;
1475 double un, lds, con, res;
1476
1477 con = 5.0;
1478 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1479 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1480 if (pos != -1) {
1481 lds = ld->vals[pos] * 1.0;
1482 un = unhalt->vals[pos] * 1.0;
1483 } else {
1484 lds = ld->sum * 1.0;
1485 un = unhalt->sum * 1.0;
1486 }
1487 res = (lds * con)/un;
1488 ret = printf("%1.3f", res);
1489 return(ret);
1490 }
1491
1492 static int
aliasing_broad(struct counters * cpu,int pos)1493 aliasing_broad(struct counters *cpu, int pos)
1494 {
1495 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1496 int ret;
1497 struct counters *ld;
1498 struct counters *unhalt;
1499 double un, lds, con, res;
1500
1501 con = 7.0;
1502 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1503 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1504 if (pos != -1) {
1505 lds = ld->vals[pos] * 1.0;
1506 un = unhalt->vals[pos] * 1.0;
1507 } else {
1508 lds = ld->sum * 1.0;
1509 un = unhalt->sum * 1.0;
1510 }
1511 res = (lds * con)/un;
1512 ret = printf("%1.3f", res);
1513 return(ret);
1514 }
1515
1516
1517 static int
fpassists(struct counters * cpu,int pos)1518 fpassists(struct counters *cpu, int pos)
1519 {
1520 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1521 int ret;
1522 struct counters *fp;
1523 struct counters *inst;
1524 double un, fpd, res;
1525
1526 inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1527 fp = find_counter(cpu, "FP_ASSIST.ANY");
1528 if (pos != -1) {
1529 fpd = fp->vals[pos] * 1.0;
1530 un = inst->vals[pos] * 1.0;
1531 } else {
1532 fpd = fp->sum * 1.0;
1533 un = inst->sum * 1.0;
1534 }
1535 res = fpd/un;
1536 ret = printf("%1.3f", res);
1537 return(ret);
1538 }
1539
1540 static int
otherassistavx(struct counters * cpu,int pos)1541 otherassistavx(struct counters *cpu, int pos)
1542 {
1543 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1544 int ret;
1545 struct counters *oth;
1546 struct counters *unhalt;
1547 double un, ot, con, res;
1548
1549 con = 75.0;
1550 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1551 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1552 if (pos != -1) {
1553 ot = oth->vals[pos] * 1.0;
1554 un = unhalt->vals[pos] * 1.0;
1555 } else {
1556 ot = oth->sum * 1.0;
1557 un = unhalt->sum * 1.0;
1558 }
1559 res = (ot * con)/un;
1560 ret = printf("%1.3f", res);
1561 return(ret);
1562 }
1563
1564 static int
otherassistsse(struct counters * cpu,int pos)1565 otherassistsse(struct counters *cpu, int pos)
1566 {
1567
1568 int ret;
1569 struct counters *oth;
1570 struct counters *unhalt;
1571 double un, ot, con, res;
1572
1573 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1574 con = 75.0;
1575 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1576 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1577 if (pos != -1) {
1578 ot = oth->vals[pos] * 1.0;
1579 un = unhalt->vals[pos] * 1.0;
1580 } else {
1581 ot = oth->sum * 1.0;
1582 un = unhalt->sum * 1.0;
1583 }
1584 res = (ot * con)/un;
1585 ret = printf("%1.3f", res);
1586 return(ret);
1587 }
1588
1589 static int
efficiency1(struct counters * cpu,int pos)1590 efficiency1(struct counters *cpu, int pos)
1591 {
1592
1593 int ret;
1594 struct counters *uops;
1595 struct counters *unhalt;
1596 double un, ot, con, res;
1597
1598 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1599 con = 4.0;
1600 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1601 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1602 if (pos != -1) {
1603 ot = uops->vals[pos] * 1.0;
1604 un = unhalt->vals[pos] * 1.0;
1605 } else {
1606 ot = uops->sum * 1.0;
1607 un = unhalt->sum * 1.0;
1608 }
1609 res = ot/(con * un);
1610 ret = printf("%1.3f", res);
1611 return(ret);
1612 }
1613
1614 static int
efficiency2(struct counters * cpu,int pos)1615 efficiency2(struct counters *cpu, int pos)
1616 {
1617
1618 int ret;
1619 struct counters *uops;
1620 struct counters *unhalt;
1621 double un, ot, res;
1622
1623 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1624 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1625 uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1626 if (pos != -1) {
1627 ot = uops->vals[pos] * 1.0;
1628 un = unhalt->vals[pos] * 1.0;
1629 } else {
1630 ot = uops->sum * 1.0;
1631 un = unhalt->sum * 1.0;
1632 }
1633 res = un/ot;
1634 ret = printf("%1.3f", res);
1635 return(ret);
1636 }
1637
1638 #define SANDY_BRIDGE_COUNT 20
1639 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1640 /*01*/ { "allocstall1", "thresh > .05",
1641 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1642 allocstall1, 2 },
1643 /* -- not defined for SB right (partial-rat_stalls) 02*/
1644 { "allocstall2", "thresh > .05",
1645 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1646 allocstall2, 2 },
1647 /*03*/ { "br_miss", "thresh >= .2",
1648 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1649 br_mispredict, 2 },
1650 /*04*/ { "splitload", "thresh >= .1",
1651 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1652 splitload_sb, 2 },
1653 /* 05*/ { "splitstore", "thresh >= .01",
1654 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1655 splitstore_sb, 2 },
1656 /*06*/ { "contested", "thresh >= .05",
1657 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1658 contested, 2 },
1659 /*07*/ { "blockstorefwd", "thresh >= .05",
1660 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1661 blockstoreforward, 2 },
1662 /*08*/ { "cache2", "thresh >= .2",
1663 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1664 cache2, 4 },
1665 /*09*/ { "cache1", "thresh >= .2",
1666 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1667 cache1, 2 },
1668 /*10*/ { "dtlbmissload", "thresh >= .1",
1669 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1670 dtlb_missload, 3 },
1671 /*11*/ { "dtlbmissstore", "thresh >= .05",
1672 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1673 dtlb_missstore, 3 },
1674 /*12*/ { "frontendstall", "thresh >= .15",
1675 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1676 frontendstall, 2 },
1677 /*13*/ { "clears", "thresh >= .02",
1678 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1679 clears, 4 },
1680 /*14*/ { "microassist", "thresh >= .05",
1681 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1682 microassist, 2 },
1683 /*15*/ { "aliasing_4k", "thresh >= .1",
1684 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1685 aliasing, 2 },
1686 /*16*/ { "fpassist", "look for a excessive value",
1687 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1688 fpassists, 2 },
1689 /*17*/ { "otherassistavx", "look for a excessive value",
1690 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1691 otherassistavx, 2},
1692 /*18*/ { "otherassistsse", "look for a excessive value",
1693 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1694 otherassistsse, 2 },
1695 /*19*/ { "eff1", "thresh < .9",
1696 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1697 efficiency1, 2 },
1698 /*20*/ { "eff2", "thresh > 1.0",
1699 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1700 efficiency2, 2 },
1701 };
1702
1703
1704 #define IVY_BRIDGE_COUNT 21
1705 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1706 /*1*/ { "eff1", "thresh < .75",
1707 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1708 efficiency1, 2 },
1709 /*2*/ { "eff2", "thresh > 1.0",
1710 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1711 efficiency2, 2 },
1712 /*3*/ { "itlbmiss", "thresh > .05",
1713 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1714 itlb_miss, 2 },
1715 /*4*/ { "icachemiss", "thresh > .05",
1716 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1717 icache_miss, 3 },
1718 /*5*/ { "lcpstall", "thresh > .05",
1719 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1720 lcp_stall, 2 },
1721 /*6*/ { "cache1", "thresh >= .2",
1722 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1723 cache1ib, 2 },
1724 /*7*/ { "cache2", "thresh >= .2",
1725 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1726 cache2ib, 2 },
1727 /*8*/ { "contested", "thresh >= .05",
1728 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1729 contested, 2 },
1730 /*9*/ { "datashare", "thresh >= .05",
1731 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1732 datasharing, 2 },
1733 /*10*/ { "blockstorefwd", "thresh >= .05",
1734 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1735 blockstoreforward, 2 },
1736 /*11*/ { "splitload", "thresh >= .1",
1737 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1738 splitloadib, 4 },
1739 /*12*/ { "splitstore", "thresh >= .01",
1740 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1741 splitstore, 2 },
1742 /*13*/ { "aliasing_4k", "thresh >= .1",
1743 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1744 aliasing, 2 },
1745 /*14*/ { "dtlbmissload", "thresh >= .1",
1746 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1747 dtlb_missload , 3},
1748 /*15*/ { "dtlbmissstore", "thresh >= .05",
1749 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1750 dtlb_missstore, 3 },
1751 /*16*/ { "br_miss", "thresh >= .2",
1752 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1753 br_mispredictib, 8 },
1754 /*17*/ { "clears", "thresh >= .02",
1755 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1756 clears, 4 },
1757 /*18*/ { "microassist", "thresh >= .05",
1758 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1759 microassist, 2 },
1760 /*19*/ { "fpassist", "look for a excessive value",
1761 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1762 fpassists, 2 },
1763 /*20*/ { "otherassistavx", "look for a excessive value",
1764 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1765 otherassistavx , 2},
1766 /*21*/ { "otherassistsse", "look for a excessive value",
1767 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1768 otherassistsse, 2 },
1769 };
1770
1771 #define HASWELL_COUNT 20
1772 static struct cpu_entry haswell[HASWELL_COUNT] = {
1773 /*1*/ { "eff1", "thresh < .75",
1774 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1775 efficiency1, 2 },
1776 /*2*/ { "eff2", "thresh > 1.0",
1777 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1778 efficiency2, 2 },
1779 /*3*/ { "itlbmiss", "thresh > .05",
1780 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1781 itlb_miss, 2 },
1782 /*4*/ { "icachemiss", "thresh > .05",
1783 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1784 icache_miss_has, 2 },
1785 /*5*/ { "lcpstall", "thresh > .05",
1786 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1787 lcp_stall, 2 },
1788 /*6*/ { "cache1", "thresh >= .2",
1789 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1790 cache1ib, 2 },
1791 /*7*/ { "cache2", "thresh >= .2",
1792 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1793 cache2has, 4 },
1794 /*8*/ { "contested", "thresh >= .05",
1795 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1796 contested_has, 2 },
1797 /*9*/ { "datashare", "thresh >= .05",
1798 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1799 datasharing_has, 2 },
1800 /*10*/ { "blockstorefwd", "thresh >= .05",
1801 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1802 blockstoreforward, 2 },
1803 /*11*/ { "splitload", "thresh >= .1",
1804 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1805 splitload , 2},
1806 /*12*/ { "splitstore", "thresh >= .01",
1807 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1808 splitstore, 2 },
1809 /*13*/ { "aliasing_4k", "thresh >= .1",
1810 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1811 aliasing, 2 },
1812 /*14*/ { "dtlbmissload", "thresh >= .1",
1813 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1814 dtlb_missload, 3 },
1815 /*15*/ { "br_miss", "thresh >= .2",
1816 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1817 br_mispredict, 2 },
1818 /*16*/ { "clears", "thresh >= .02",
1819 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1820 clears, 4 },
1821 /*17*/ { "microassist", "thresh >= .05",
1822 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1823 microassist, 2 },
1824 /*18*/ { "fpassist", "look for a excessive value",
1825 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1826 fpassists, 2 },
1827 /*19*/ { "otherassistavx", "look for a excessive value",
1828 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1829 otherassistavx, 2 },
1830 /*20*/ { "otherassistsse", "look for a excessive value",
1831 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1832 otherassistsse, 2 },
1833 };
1834
1835
1836 static void
explain_name_broad(const char * name)1837 explain_name_broad(const char *name)
1838 {
1839 const char *mythresh;
1840 if (strcmp(name, "eff1") == 0) {
1841 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1842 mythresh = "thresh < .75";
1843 } else if (strcmp(name, "eff2") == 0) {
1844 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1845 mythresh = "thresh > 1.0";
1846 } else if (strcmp(name, "itlbmiss") == 0) {
1847 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1848 mythresh = "thresh > .05";
1849 } else if (strcmp(name, "icachemiss") == 0) {
1850 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1851 mythresh = "thresh > .05";
1852 } else if (strcmp(name, "lcpstall") == 0) {
1853 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1854 mythresh = "thresh > .05";
1855 } else if (strcmp(name, "cache1") == 0) {
1856 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1857 mythresh = "thresh >= .1";
1858 } else if (strcmp(name, "cache2") == 0) {
1859 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1860 mythresh = "thresh >= .2";
1861 } else if (strcmp(name, "contested") == 0) {
1862 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1863 mythresh = "thresh >= .05";
1864 } else if (strcmp(name, "datashare") == 0) {
1865 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1866 mythresh = "thresh > .05";
1867 } else if (strcmp(name, "blockstorefwd") == 0) {
1868 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1869 mythresh = "thresh >= .05";
1870 } else if (strcmp(name, "aliasing_4k") == 0) {
1871 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1872 mythresh = "thresh >= .1";
1873 } else if (strcmp(name, "dtlbmissload") == 0) {
1874 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1875 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
1876 mythresh = "thresh >= .1";
1877
1878 } else if (strcmp(name, "br_miss") == 0) {
1879 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1880 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1881 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1882 mythresh = "thresh >= .2";
1883 } else if (strcmp(name, "clears") == 0) {
1884 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1885 printf(" MACHINE_CLEARS.SMC + \n");
1886 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1887 mythresh = "thresh >= .02";
1888 } else if (strcmp(name, "fpassist") == 0) {
1889 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1890 mythresh = "look for a excessive value";
1891 } else if (strcmp(name, "otherassistavx") == 0) {
1892 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1893 mythresh = "look for a excessive value";
1894 } else if (strcmp(name, "microassist") == 0) {
1895 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1896 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1897 mythresh = "thresh >= .05";
1898 } else {
1899 printf("Unknown name:%s\n", name);
1900 mythresh = "unknown entry";
1901 }
1902 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1903 }
1904
1905
1906 #define BROADWELL_COUNT 17
1907 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1908 /*1*/ { "eff1", "thresh < .75",
1909 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1910 efficiency1, 2 },
1911 /*2*/ { "eff2", "thresh > 1.0",
1912 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1913 efficiency2, 2 },
1914 /*3*/ { "itlbmiss", "thresh > .05",
1915 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1916 itlb_miss_broad, 3 },
1917 /*4*/ { "icachemiss", "thresh > .05",
1918 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1919 icache_miss_has, 2 },
1920 /*5*/ { "lcpstall", "thresh > .05",
1921 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1922 lcp_stall, 2 },
1923 /*6*/ { "cache1", "thresh >= .1",
1924 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1925 cache1broad, 2 },
1926 /*7*/ { "cache2", "thresh >= .2",
1927 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1928 cache2broad, 2 },
1929 /*8*/ { "contested", "thresh >= .05",
1930 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1931 contestedbroad, 2 },
1932 /*9*/ { "datashare", "thresh >= .05",
1933 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1934 datasharing_has, 2 },
1935 /*10*/ { "blockstorefwd", "thresh >= .05",
1936 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1937 blockstoreforward, 2 },
1938 /*11*/ { "aliasing_4k", "thresh >= .1",
1939 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1940 aliasing_broad, 2 },
1941 /*12*/ { "dtlbmissload", "thresh >= .1",
1942 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1943 dtlb_missload, 3 },
1944 /*13*/ { "br_miss", "thresh >= .2",
1945 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1946 br_mispredict_broad, 7 },
1947 /*14*/ { "clears", "thresh >= .02",
1948 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1949 clears_broad, 5 },
1950 /*15*/ { "fpassist", "look for a excessive value",
1951 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1952 fpassists, 2 },
1953 /*16*/ { "otherassistavx", "look for a excessive value",
1954 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1955 otherassistavx, 2 },
1956 /*17*/ { "microassist", "thresh >= .2",
1957 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
1958 microassist_broad, 4 },
1959 };
1960
1961
1962 static void
set_sandybridge(void)1963 set_sandybridge(void)
1964 {
1965 strcpy(the_cpu.cputype, "SandyBridge PMC");
1966 the_cpu.number = SANDY_BRIDGE_COUNT;
1967 the_cpu.ents = sandy_bridge;
1968 the_cpu.explain = explain_name_sb;
1969 }
1970
1971 static void
set_ivybridge(void)1972 set_ivybridge(void)
1973 {
1974 strcpy(the_cpu.cputype, "IvyBridge PMC");
1975 the_cpu.number = IVY_BRIDGE_COUNT;
1976 the_cpu.ents = ivy_bridge;
1977 the_cpu.explain = explain_name_ib;
1978 }
1979
1980
1981 static void
set_haswell(void)1982 set_haswell(void)
1983 {
1984 strcpy(the_cpu.cputype, "HASWELL PMC");
1985 the_cpu.number = HASWELL_COUNT;
1986 the_cpu.ents = haswell;
1987 the_cpu.explain = explain_name_has;
1988 }
1989
1990
1991 static void
set_broadwell(void)1992 set_broadwell(void)
1993 {
1994 strcpy(the_cpu.cputype, "HASWELL PMC");
1995 the_cpu.number = BROADWELL_COUNT;
1996 the_cpu.ents = broadwell;
1997 the_cpu.explain = explain_name_broad;
1998 }
1999
2000
2001 static int
set_expression(const char * name)2002 set_expression(const char *name)
2003 {
2004 int found = 0, i;
2005 for(i=0 ; i< the_cpu.number; i++) {
2006 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2007 found = 1;
2008 expression = the_cpu.ents[i].func;
2009 command = the_cpu.ents[i].command;
2010 threshold = the_cpu.ents[i].thresh;
2011 if (the_cpu.ents[i].counters_required > max_pmc_counters) {
2012 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2013 the_cpu.ents[i].name,
2014 the_cpu.ents[i].counters_required, max_pmc_counters);
2015 printf("Sorry this test can not be run\n");
2016 if (run_all == 0) {
2017 exit(-1);
2018 } else {
2019 return(-1);
2020 }
2021 }
2022 break;
2023 }
2024 }
2025 if (!found) {
2026 printf("For CPU type %s we have no expression:%s\n",
2027 the_cpu.cputype, name);
2028 exit(-1);
2029 }
2030 return(0);
2031 }
2032
2033
2034
2035
2036
2037 static int
validate_expression(char * name)2038 validate_expression(char *name)
2039 {
2040 int i, found;
2041
2042 found = 0;
2043 for(i=0 ; i< the_cpu.number; i++) {
2044 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2045 found = 1;
2046 break;
2047 }
2048 }
2049 if (!found) {
2050 return(-1);
2051 }
2052 return (0);
2053 }
2054
2055 static void
do_expression(struct counters * cpu,int pos)2056 do_expression(struct counters *cpu, int pos)
2057 {
2058 if (expression == NULL)
2059 return;
2060 (*expression)(cpu, pos);
2061 }
2062
2063 static void
process_header(int idx,char * p)2064 process_header(int idx, char *p)
2065 {
2066 struct counters *up;
2067 int i, len, nlen;
2068 /*
2069 * Given header element idx, at p in
2070 * form 's/NN/nameof'
2071 * process the entry to pull out the name and
2072 * the CPU number.
2073 */
2074 if (strncmp(p, "s/", 2)) {
2075 printf("Check -- invalid header no s/ in %s\n",
2076 p);
2077 return;
2078 }
2079 up = &cnts[idx];
2080 up->cpu = strtol(&p[2], NULL, 10);
2081 len = strlen(p);
2082 for (i=2; i<len; i++) {
2083 if (p[i] == '/') {
2084 nlen = strlen(&p[(i+1)]);
2085 if (nlen < (MAX_NLEN-1)) {
2086 strcpy(up->counter_name, &p[(i+1)]);
2087 } else {
2088 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2089 }
2090 }
2091 }
2092 }
2093
2094 static void
build_counters_from_header(FILE * io)2095 build_counters_from_header(FILE *io)
2096 {
2097 char buffer[8192], *p;
2098 int i, len, cnt;
2099 size_t mlen;
2100
2101 /* We have a new start, lets
2102 * setup our headers and cpus.
2103 */
2104 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2105 printf("First line can't be read from file err:%d\n", errno);
2106 return;
2107 }
2108 /*
2109 * Ok output is an array of counters. Once
2110 * we start to read the values in we must
2111 * put them in there slot to match there CPU and
2112 * counter being updated. We create a mass array
2113 * of the counters, filling in the CPU and
2114 * counter name.
2115 */
2116 /* How many do we get? */
2117 len = strlen(buffer);
2118 for (i=0, cnt=0; i<len; i++) {
2119 if (strncmp(&buffer[i], "s/", 2) == 0) {
2120 cnt++;
2121 for(;i<len;i++) {
2122 if (buffer[i] == ' ')
2123 break;
2124 }
2125 }
2126 }
2127 mlen = sizeof(struct counters) * cnt;
2128 cnts = malloc(mlen);
2129 ncnts = cnt;
2130 if (cnts == NULL) {
2131 printf("No memory err:%d\n", errno);
2132 return;
2133 }
2134 memset(cnts, 0, mlen);
2135 for (i=0, cnt=0; i<len; i++) {
2136 if (strncmp(&buffer[i], "s/", 2) == 0) {
2137 p = &buffer[i];
2138 for(;i<len;i++) {
2139 if (buffer[i] == ' ') {
2140 buffer[i] = 0;
2141 break;
2142 }
2143 }
2144 process_header(cnt, p);
2145 cnt++;
2146 }
2147 }
2148 if (verbose)
2149 printf("We have %d entries\n", cnt);
2150 }
2151 extern int max_to_collect;
2152 int max_to_collect = MAX_COUNTER_SLOTS;
2153
2154 static int
read_a_line(FILE * io)2155 read_a_line(FILE *io)
2156 {
2157 char buffer[8192], *p, *stop;
2158 int pos, i;
2159
2160 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2161 return(0);
2162 }
2163 p = buffer;
2164 for (i=0; i<ncnts; i++) {
2165 pos = cnts[i].pos;
2166 cnts[i].vals[pos] = strtol(p, &stop, 0);
2167 cnts[i].pos++;
2168 cnts[i].sum += cnts[i].vals[pos];
2169 p = stop;
2170 }
2171 return (1);
2172 }
2173
2174 extern int cpu_count_out;
2175 int cpu_count_out=0;
2176
2177 static void
print_header(void)2178 print_header(void)
2179 {
2180 int i, cnt, printed_cnt;
2181
2182 printf("*********************************\n");
2183 for(i=0, cnt=0; i<MAX_CPU; i++) {
2184 if (glob_cpu[i]) {
2185 cnt++;
2186 }
2187 }
2188 cpu_count_out = cnt;
2189 for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2190 if (glob_cpu[i]) {
2191 printf("CPU%d", i);
2192 printed_cnt++;
2193 }
2194 if (printed_cnt == cnt) {
2195 printf("\n");
2196 break;
2197 } else {
2198 printf("\t");
2199 }
2200 }
2201 }
2202
2203 static void
lace_cpus_together(void)2204 lace_cpus_together(void)
2205 {
2206 int i, j, lace_cpu;
2207 struct counters *cpat, *at;
2208
2209 for(i=0; i<ncnts; i++) {
2210 cpat = &cnts[i];
2211 if (cpat->next_cpu) {
2212 /* Already laced in */
2213 continue;
2214 }
2215 lace_cpu = cpat->cpu;
2216 if (lace_cpu >= MAX_CPU) {
2217 printf("CPU %d to big\n", lace_cpu);
2218 continue;
2219 }
2220 if (glob_cpu[lace_cpu] == NULL) {
2221 glob_cpu[lace_cpu] = cpat;
2222 } else {
2223 /* Already processed this cpu */
2224 continue;
2225 }
2226 /* Ok look forward for cpu->cpu and link in */
2227 for(j=(i+1); j<ncnts; j++) {
2228 at = &cnts[j];
2229 if (at->next_cpu) {
2230 continue;
2231 }
2232 if (at->cpu == lace_cpu) {
2233 /* Found one */
2234 cpat->next_cpu = at;
2235 cpat = at;
2236 }
2237 }
2238 }
2239 }
2240
2241
2242 static void
process_file(char * filename)2243 process_file(char *filename)
2244 {
2245 FILE *io;
2246 int i;
2247 int line_at, not_done;
2248 pid_t pid_of_command=0;
2249
2250 if (filename == NULL) {
2251 io = my_popen(command, "r", &pid_of_command);
2252 } else {
2253 io = fopen(filename, "r");
2254 if (io == NULL) {
2255 printf("Can't process file %s err:%d\n",
2256 filename, errno);
2257 return;
2258 }
2259 }
2260 build_counters_from_header(io);
2261 if (cnts == NULL) {
2262 /* Nothing we can do */
2263 printf("Nothing to do -- no counters built\n");
2264 if (io) {
2265 fclose(io);
2266 }
2267 return;
2268 }
2269 lace_cpus_together();
2270 print_header();
2271 if (verbose) {
2272 for (i=0; i<ncnts; i++) {
2273 printf("Counter:%s cpu:%d index:%d\n",
2274 cnts[i].counter_name,
2275 cnts[i].cpu, i);
2276 }
2277 }
2278 line_at = 0;
2279 not_done = 1;
2280 while(not_done) {
2281 if (read_a_line(io)) {
2282 line_at++;
2283 } else {
2284 break;
2285 }
2286 if (line_at >= max_to_collect) {
2287 not_done = 0;
2288 }
2289 if (filename == NULL) {
2290 int cnt;
2291 /* For the ones we dynamically open we print now */
2292 for(i=0, cnt=0; i<MAX_CPU; i++) {
2293 do_expression(glob_cpu[i], (line_at-1));
2294 cnt++;
2295 if (cnt == cpu_count_out) {
2296 printf("\n");
2297 break;
2298 } else {
2299 printf("\t");
2300 }
2301 }
2302 }
2303 }
2304 if (filename) {
2305 fclose(io);
2306 } else {
2307 my_pclose(io, pid_of_command);
2308 }
2309 }
2310 #if defined(__amd64__)
2311 #define cpuid(in,a,b,c,d)\
2312 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2313
2314 static __inline void
do_cpuid(u_int ax,u_int cx,u_int * p)2315 do_cpuid(u_int ax, u_int cx, u_int *p)
2316 {
2317 __asm __volatile("cpuid"
2318 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2319 : "0" (ax), "c" (cx) );
2320 }
2321
2322 #else
2323 #define cpuid(in, a, b, c, d)
2324 #define do_cpuid(ax, cx, p)
2325 #endif
2326
2327 static void
get_cpuid_set(void)2328 get_cpuid_set(void)
2329 {
2330 unsigned long eax, ebx, ecx, edx;
2331 int model;
2332 pid_t pid_of_command=0;
2333 size_t sz, len;
2334 FILE *io;
2335 char linebuf[1024], *str;
2336 u_int reg[4];
2337
2338 eax = ebx = ecx = edx = 0;
2339
2340 cpuid(0, eax, ebx, ecx, edx);
2341 if (ebx == 0x68747541) {
2342 printf("AMD processors are not supported by this program\n");
2343 printf("Sorry\n");
2344 exit(0);
2345 } else if (ebx == 0x6972794) {
2346 printf("Cyrix processors are not supported by this program\n");
2347 printf("Sorry\n");
2348 exit(0);
2349 } else if (ebx == 0x756e6547) {
2350 printf("Genuine Intel\n");
2351 } else {
2352 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2353 exit(0);
2354 }
2355 cpuid(1, eax, ebx, ecx, edx);
2356 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2357 printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2358 switch (eax & 0xF00) {
2359 case 0x500: /* Pentium family processors */
2360 printf("Intel Pentium P5\n");
2361 goto not_supported;
2362 break;
2363 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
2364 switch (model) {
2365 case 0x1:
2366 printf("Intel Pentium P6\n");
2367 goto not_supported;
2368 break;
2369 case 0x3:
2370 case 0x5:
2371 printf("Intel PII\n");
2372 goto not_supported;
2373 break;
2374 case 0x6: case 0x16:
2375 printf("Intel CL\n");
2376 goto not_supported;
2377 break;
2378 case 0x7: case 0x8: case 0xA: case 0xB:
2379 printf("Intel PIII\n");
2380 goto not_supported;
2381 break;
2382 case 0x9: case 0xD:
2383 printf("Intel PM\n");
2384 goto not_supported;
2385 break;
2386 case 0xE:
2387 printf("Intel CORE\n");
2388 goto not_supported;
2389 break;
2390 case 0xF:
2391 printf("Intel CORE2\n");
2392 goto not_supported;
2393 break;
2394 case 0x17:
2395 printf("Intel CORE2EXTREME\n");
2396 goto not_supported;
2397 break;
2398 case 0x1C: /* Per Intel document 320047-002. */
2399 printf("Intel ATOM\n");
2400 goto not_supported;
2401 break;
2402 case 0x1A:
2403 case 0x1E: /*
2404 * Per Intel document 253669-032 9/2009,
2405 * pages A-2 and A-57
2406 */
2407 case 0x1F: /*
2408 * Per Intel document 253669-032 9/2009,
2409 * pages A-2 and A-57
2410 */
2411 printf("Intel COREI7\n");
2412 goto not_supported;
2413 break;
2414 case 0x2E:
2415 printf("Intel NEHALEM\n");
2416 goto not_supported;
2417 break;
2418 case 0x25: /* Per Intel document 253669-033US 12/2009. */
2419 case 0x2C: /* Per Intel document 253669-033US 12/2009. */
2420 printf("Intel WESTMERE\n");
2421 goto not_supported;
2422 break;
2423 case 0x2F: /* Westmere-EX, seen in wild */
2424 printf("Intel WESTMERE\n");
2425 goto not_supported;
2426 break;
2427 case 0x2A: /* Per Intel document 253669-039US 05/2011. */
2428 printf("Intel SANDYBRIDGE\n");
2429 set_sandybridge();
2430 break;
2431 case 0x2D: /* Per Intel document 253669-044US 08/2012. */
2432 printf("Intel SANDYBRIDGE_XEON\n");
2433 set_sandybridge();
2434 break;
2435 case 0x3A: /* Per Intel document 253669-043US 05/2012. */
2436 printf("Intel IVYBRIDGE\n");
2437 set_ivybridge();
2438 break;
2439 case 0x3E: /* Per Intel document 325462-045US 01/2013. */
2440 printf("Intel IVYBRIDGE_XEON\n");
2441 set_ivybridge();
2442 break;
2443 case 0x3F: /* Per Intel document 325462-045US 09/2014. */
2444 printf("Intel HASWELL (Xeon)\n");
2445 set_haswell();
2446 break;
2447 case 0x3C: /* Per Intel document 325462-045US 01/2013. */
2448 case 0x45:
2449 case 0x46:
2450 printf("Intel HASWELL\n");
2451 set_haswell();
2452 break;
2453
2454 case 0x4e:
2455 case 0x5e:
2456 printf("Intel SKY-LAKE\n");
2457 goto not_supported;
2458 break;
2459 case 0x3D:
2460 case 0x47:
2461 printf("Intel BROADWELL\n");
2462 set_broadwell();
2463 break;
2464 case 0x4f:
2465 case 0x56:
2466 printf("Intel BROADWEL (Xeon)\n");
2467 set_broadwell();
2468 break;
2469
2470 case 0x4D:
2471 /* Per Intel document 330061-001 01/2014. */
2472 printf("Intel ATOM_SILVERMONT\n");
2473 goto not_supported;
2474 break;
2475 default:
2476 printf("Intel model 0x%x is not known -- sorry\n",
2477 model);
2478 goto not_supported;
2479 break;
2480 }
2481 break;
2482 case 0xF00: /* P4 */
2483 printf("Intel unknown model %d\n", model);
2484 goto not_supported;
2485 break;
2486 }
2487 do_cpuid(0xa, 0, reg);
2488 max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2489 printf("We have %d PMC counters to work with\n", max_pmc_counters);
2490 /* Ok lets load the list of all known PMC's */
2491 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2492 if (valid_pmcs == NULL) {
2493 /* Likely */
2494 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2495 sz = sizeof(char *) * pmc_allocated_cnt;
2496 valid_pmcs = malloc(sz);
2497 if (valid_pmcs == NULL) {
2498 printf("No memory allocation fails at startup?\n");
2499 exit(-1);
2500 }
2501 memset(valid_pmcs, 0, sz);
2502 }
2503
2504 while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2505 if (linebuf[0] != '\t') {
2506 /* sometimes headers ;-) */
2507 continue;
2508 }
2509 len = strlen(linebuf);
2510 if (linebuf[(len-1)] == '\n') {
2511 /* Likely */
2512 linebuf[(len-1)] = 0;
2513 }
2514 str = &linebuf[1];
2515 len = strlen(str) + 1;
2516 valid_pmcs[valid_pmc_cnt] = malloc(len);
2517 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2518 printf("No memory2 allocation fails at startup?\n");
2519 exit(-1);
2520 }
2521 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2522 strcpy(valid_pmcs[valid_pmc_cnt], str);
2523 valid_pmc_cnt++;
2524 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2525 /* Got to expand -- unlikely */
2526 char **more;
2527
2528 sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2529 more = malloc(sz);
2530 if (more == NULL) {
2531 printf("No memory3 allocation fails at startup?\n");
2532 exit(-1);
2533 }
2534 memset(more, 0, sz);
2535 memcpy(more, valid_pmcs, sz);
2536 pmc_allocated_cnt *= 2;
2537 free(valid_pmcs);
2538 valid_pmcs = more;
2539 }
2540 }
2541 my_pclose(io, pid_of_command);
2542 return;
2543 not_supported:
2544 printf("Not supported\n");
2545 exit(-1);
2546 }
2547
2548 static void
explain_all(void)2549 explain_all(void)
2550 {
2551 int i;
2552 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2553 printf("-------------------------------------------------------------\n");
2554 for(i=0; i<the_cpu.number; i++){
2555 printf("For -e %s ", the_cpu.ents[i].name);
2556 (*the_cpu.explain)(the_cpu.ents[i].name);
2557 printf("----------------------------\n");
2558 }
2559 }
2560
2561 static void
test_for_a_pmc(const char * pmc,int out_so_far)2562 test_for_a_pmc(const char *pmc, int out_so_far)
2563 {
2564 FILE *io;
2565 pid_t pid_of_command=0;
2566 char my_command[1024];
2567 char line[1024];
2568 char resp[1024];
2569 int len, llen, i;
2570
2571 if (out_so_far < 50) {
2572 len = 50 - out_so_far;
2573 for(i=0; i<len; i++) {
2574 printf(" ");
2575 }
2576 }
2577 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2578 io = my_popen(my_command, "r", &pid_of_command);
2579 if (io == NULL) {
2580 printf("Failed -- popen fails\n");
2581 return;
2582 }
2583 /* Setup what we expect */
2584 len = sprintf(resp, "%s", pmc);
2585 if (fgets(line, sizeof(line), io) == NULL) {
2586 printf("Failed -- no output from pmstat\n");
2587 goto out;
2588 }
2589 llen = strlen(line);
2590 if (line[(llen-1)] == '\n') {
2591 line[(llen-1)] = 0;
2592 llen--;
2593 }
2594 for(i=2; i<(llen-len); i++) {
2595 if (strncmp(&line[i], "ERROR", 5) == 0) {
2596 printf("Failed %s\n", line);
2597 goto out;
2598 } else if (strncmp(&line[i], resp, len) == 0) {
2599 int j, k;
2600
2601 if (fgets(line, sizeof(line), io) == NULL) {
2602 printf("Failed -- no second output from pmstat\n");
2603 goto out;
2604 }
2605 len = strlen(line);
2606 for (j=0; j<len; j++) {
2607 if (line[j] == ' ') {
2608 j++;
2609 } else {
2610 break;
2611 }
2612 }
2613 printf("Pass");
2614 len = strlen(&line[j]);
2615 if (len < 20) {
2616 for(k=0; k<(20-len); k++) {
2617 printf(" ");
2618 }
2619 }
2620 if (len) {
2621 printf("%s", &line[j]);
2622 } else {
2623 printf("\n");
2624 }
2625 goto out;
2626 }
2627 }
2628 printf("Failed -- '%s' not '%s'\n", line, resp);
2629 out:
2630 my_pclose(io, pid_of_command);
2631
2632 }
2633
2634 static int
add_it_to(char ** vars,int cur_cnt,char * name)2635 add_it_to(char **vars, int cur_cnt, char *name)
2636 {
2637 int i;
2638 size_t len;
2639 for(i=0; i<cur_cnt; i++) {
2640 if (strcmp(vars[i], name) == 0) {
2641 /* Already have */
2642 return(0);
2643 }
2644 }
2645 if (vars[cur_cnt] != NULL) {
2646 printf("Cur_cnt:%d filled with %s??\n",
2647 cur_cnt, vars[cur_cnt]);
2648 exit(-1);
2649 }
2650 /* Ok its new */
2651 len = strlen(name) + 1;
2652 vars[cur_cnt] = malloc(len);
2653 if (vars[cur_cnt] == NULL) {
2654 printf("No memory %s\n", __FUNCTION__);
2655 exit(-1);
2656 }
2657 memset(vars[cur_cnt], 0, len);
2658 strcpy(vars[cur_cnt], name);
2659 return(1);
2660 }
2661
2662 static char *
build_command_for_exp(struct expression * exp)2663 build_command_for_exp(struct expression *exp)
2664 {
2665 /*
2666 * Build the pmcstat command to handle
2667 * the passed in expression.
2668 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2669 * where NNN and QQQ represent the PMC's in the expression
2670 * uniquely..
2671 */
2672 char forming[1024];
2673 int cnt_pmc, alloced_pmcs, i;
2674 struct expression *at;
2675 char **vars, *cmd;
2676 size_t mal;
2677
2678 alloced_pmcs = cnt_pmc = 0;
2679 /* first how many do we have */
2680 at = exp;
2681 while (at) {
2682 if (at->type == TYPE_VALUE_PMC) {
2683 cnt_pmc++;
2684 }
2685 at = at->next;
2686 }
2687 if (cnt_pmc == 0) {
2688 printf("No PMC's in your expression -- nothing to do!!\n");
2689 exit(0);
2690 }
2691 mal = cnt_pmc * sizeof(char *);
2692 vars = malloc(mal);
2693 if (vars == NULL) {
2694 printf("No memory\n");
2695 exit(-1);
2696 }
2697 memset(vars, 0, mal);
2698 at = exp;
2699 while (at) {
2700 if (at->type == TYPE_VALUE_PMC) {
2701 if(add_it_to(vars, alloced_pmcs, at->name)) {
2702 alloced_pmcs++;
2703 }
2704 }
2705 at = at->next;
2706 }
2707 /* Now we have a unique list in vars so create our command */
2708 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
2709 for(i=0; i<alloced_pmcs; i++) {
2710 mal += strlen(vars[i]) + 4; /* var + " -s " */
2711 }
2712 cmd = malloc((mal+2));
2713 if (cmd == NULL) {
2714 printf("%s out of mem\n", __FUNCTION__);
2715 exit(-1);
2716 }
2717 memset(cmd, 0, (mal+2));
2718 strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2719 at = exp;
2720 for(i=0; i<alloced_pmcs; i++) {
2721 sprintf(forming, " -s %s", vars[i]);
2722 strcat(cmd, forming);
2723 free(vars[i]);
2724 vars[i] = NULL;
2725 }
2726 free(vars);
2727 return(cmd);
2728 }
2729
2730 static int
user_expr(struct counters * cpu,int pos)2731 user_expr(struct counters *cpu, int pos)
2732 {
2733 int ret;
2734 double res;
2735 struct counters *var;
2736 struct expression *at;
2737
2738 at = master_exp;
2739 while (at) {
2740 if (at->type == TYPE_VALUE_PMC) {
2741 var = find_counter(cpu, at->name);
2742 if (var == NULL) {
2743 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2744 exit(-1);
2745 }
2746 if (pos != -1) {
2747 at->value = var->vals[pos] * 1.0;
2748 } else {
2749 at->value = var->sum * 1.0;
2750 }
2751 }
2752 at = at->next;
2753 }
2754 res = run_expr(master_exp, 1, NULL);
2755 ret = printf("%1.3f", res);
2756 return(ret);
2757 }
2758
2759
2760 static void
set_manual_exp(struct expression * exp)2761 set_manual_exp(struct expression *exp)
2762 {
2763 expression = user_expr;
2764 command = build_command_for_exp(exp);
2765 threshold = "User defined threshold";
2766 }
2767
2768 static void
run_tests(void)2769 run_tests(void)
2770 {
2771 int i, lenout;
2772 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2773 printf("------------------------------------------------------------------------\n");
2774 for(i=0; i<valid_pmc_cnt; i++) {
2775 lenout = printf("%s", valid_pmcs[i]);
2776 fflush(stdout);
2777 test_for_a_pmc(valid_pmcs[i], lenout);
2778 }
2779 }
2780 static void
list_all(void)2781 list_all(void)
2782 {
2783 int i, cnt, j;
2784 printf("PMC Abbreviation\n");
2785 printf("--------------------------------------------------------------\n");
2786 for(i=0; i<valid_pmc_cnt; i++) {
2787 cnt = printf("%s", valid_pmcs[i]);
2788 for(j=cnt; j<52; j++) {
2789 printf(" ");
2790 }
2791 printf("%%%d\n", i);
2792 }
2793 }
2794
2795
2796 int
main(int argc,char ** argv)2797 main(int argc, char **argv)
2798 {
2799 int i, j, cnt;
2800 char *filename=NULL;
2801 const char *name=NULL;
2802 int help_only = 0;
2803 int test_mode = 0;
2804 int test_at = 0;
2805
2806 get_cpuid_set();
2807 memset(glob_cpu, 0, sizeof(glob_cpu));
2808 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2809 switch (i) {
2810 case 'A':
2811 run_all = 1;
2812 break;
2813 case 'L':
2814 list_all();
2815 return(0);
2816 case 'H':
2817 printf("**********************************\n");
2818 explain_all();
2819 printf("**********************************\n");
2820 return(0);
2821 break;
2822 case 'T':
2823 test_mode = 1;
2824 break;
2825 case 'E':
2826 master_exp = parse_expression(optarg);
2827 if (master_exp) {
2828 set_manual_exp(master_exp);
2829 }
2830 break;
2831 case 'e':
2832 if (validate_expression(optarg)) {
2833 printf("Unknown expression %s\n", optarg);
2834 return(0);
2835 }
2836 name = optarg;
2837 set_expression(optarg);
2838 break;
2839 case 'm':
2840 max_to_collect = strtol(optarg, NULL, 0);
2841 if (max_to_collect > MAX_COUNTER_SLOTS) {
2842 /* You can't collect more than max in array */
2843 max_to_collect = MAX_COUNTER_SLOTS;
2844 }
2845 break;
2846 case 'v':
2847 verbose++;
2848 break;
2849 case 'h':
2850 help_only = 1;
2851 break;
2852 case 'i':
2853 filename = optarg;
2854 break;
2855 case '?':
2856 default:
2857 use:
2858 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2859 argv[0]);
2860 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2861 printf("-v -- verbose dump debug type things -- you don't want this\n");
2862 printf("-m N -- maximum to collect is N measurements\n");
2863 printf("-e expr-name -- Do expression expr-name\n");
2864 printf("-E 'your expression' -- Do your expression\n");
2865 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2866 printf("-H -- Don't run anything, just explain all canned expressions\n");
2867 printf("-T -- Test all PMC's defined by this processor\n");
2868 printf("-A -- Run all canned tests\n");
2869 return(0);
2870 break;
2871 }
2872 }
2873 if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2874 (test_mode == 0) && (master_exp == NULL)) {
2875 printf("Without setting an expression we cannot dynamically gather information\n");
2876 printf("you must supply a filename (and you probably want verbosity)\n");
2877 goto use;
2878 }
2879 if (run_all && max_to_collect > 10) {
2880 max_to_collect = 3;
2881 }
2882 if (test_mode) {
2883 run_tests();
2884 return(0);
2885 }
2886 printf("*********************************\n");
2887 if ((master_exp == NULL) && name) {
2888 (*the_cpu.explain)(name);
2889 } else if (master_exp) {
2890 printf("Examine your expression ");
2891 print_exp(master_exp);
2892 printf("User defined threshold\n");
2893 }
2894 if (help_only) {
2895 return(0);
2896 }
2897 if (run_all) {
2898 more:
2899 name = the_cpu.ents[test_at].name;
2900 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2901 test_at++;
2902 if (set_expression(name) == -1) {
2903 if (test_at >= the_cpu.number) {
2904 goto done;
2905 } else
2906 goto more;
2907 }
2908
2909 }
2910 process_file(filename);
2911 if (verbose >= 2) {
2912 for (i=0; i<ncnts; i++) {
2913 printf("Counter:%s cpu:%d index:%d\n",
2914 cnts[i].counter_name,
2915 cnts[i].cpu, i);
2916 for(j=0; j<cnts[i].pos; j++) {
2917 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2918 }
2919 printf(" sum - %ld\n", (long int)cnts[i].sum);
2920 }
2921 }
2922 if (expression == NULL) {
2923 return(0);
2924 }
2925 if (max_to_collect > 1) {
2926 for(i=0, cnt=0; i<MAX_CPU; i++) {
2927 if (glob_cpu[i]) {
2928 do_expression(glob_cpu[i], -1);
2929 cnt++;
2930 if (cnt == cpu_count_out) {
2931 printf("\n");
2932 break;
2933 } else {
2934 printf("\t");
2935 }
2936 }
2937 }
2938 }
2939 if (run_all && (test_at < the_cpu.number)) {
2940 memset(glob_cpu, 0, sizeof(glob_cpu));
2941 ncnts = 0;
2942 printf("*********************************\n");
2943 goto more;
2944 } else if (run_all) {
2945 done:
2946 printf("*********************************\n");
2947 }
2948 return(0);
2949 }
2950