12025cf9eSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2fa7d9493SBabu Moger /*
3fa7d9493SBabu Moger * Resource Director Technology(RDT)
4fa7d9493SBabu Moger * - Monitoring code
5fa7d9493SBabu Moger *
6fa7d9493SBabu Moger * Copyright (C) 2017 Intel Corporation
7fa7d9493SBabu Moger *
8fa7d9493SBabu Moger * Author:
9fa7d9493SBabu Moger * Vikas Shivappa <[email protected]>
10fa7d9493SBabu Moger *
11fa7d9493SBabu Moger * This replaces the cqm.c based on perf but we reuse a lot of
12fa7d9493SBabu Moger * code and datastructures originally from Peter Zijlstra and Matt Fleming.
13fa7d9493SBabu Moger *
14fa7d9493SBabu Moger * More information about RDT be found in the Intel (R) x86 Architecture
15fa7d9493SBabu Moger * Software Developer Manual June 2016, volume 3, section 17.17.
16fa7d9493SBabu Moger */
17fa7d9493SBabu Moger
1813488150STony Luck #define pr_fmt(fmt) "resctrl: " fmt
1913488150STony Luck
20fb700810SJames Morse #include <linux/cpu.h>
21fa7d9493SBabu Moger #include <linux/module.h>
22f7b1843eSJames Morse #include <linux/sizes.h>
23fa7d9493SBabu Moger #include <linux/slab.h>
24ae2328b5SJames Morse
25fa7d9493SBabu Moger #include <asm/cpu_device_id.h>
26ae2328b5SJames Morse #include <asm/resctrl.h>
27ae2328b5SJames Morse
28fa7d9493SBabu Moger #include "internal.h"
29931be446SHaifeng Xu #include "trace.h"
30fa7d9493SBabu Moger
3140fc735bSJames Morse /**
3240fc735bSJames Morse * struct rmid_entry - dirty tracking for all RMID.
3340fc735bSJames Morse * @closid: The CLOSID for this entry.
3440fc735bSJames Morse * @rmid: The RMID for this entry.
3540fc735bSJames Morse * @busy: The number of domains with cached data using this RMID.
3640fc735bSJames Morse * @list: Member of the rmid_free_lru list when busy == 0.
3740fc735bSJames Morse *
3840fc735bSJames Morse * Depending on the architecture the correct monitor is accessed using
3940fc735bSJames Morse * both @closid and @rmid, or @rmid only.
4040fc735bSJames Morse *
4140fc735bSJames Morse * Take the rdtgroup_mutex when accessing.
4240fc735bSJames Morse */
43fa7d9493SBabu Moger struct rmid_entry {
4440fc735bSJames Morse u32 closid;
45fa7d9493SBabu Moger u32 rmid;
46fa7d9493SBabu Moger int busy;
47fa7d9493SBabu Moger struct list_head list;
48fa7d9493SBabu Moger };
49fa7d9493SBabu Moger
50025d5ac9SRandy Dunlap /*
51025d5ac9SRandy Dunlap * @rmid_free_lru - A least recently used list of free RMIDs
52fa7d9493SBabu Moger * These RMIDs are guaranteed to have an occupancy less than the
53fa7d9493SBabu Moger * threshold occupancy
54fa7d9493SBabu Moger */
55fa7d9493SBabu Moger static LIST_HEAD(rmid_free_lru);
56fa7d9493SBabu Moger
57025d5ac9SRandy Dunlap /*
58b30a55dfSJames Morse * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
59b30a55dfSJames Morse * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
60b30a55dfSJames Morse * Indexed by CLOSID. Protected by rdtgroup_mutex.
61b30a55dfSJames Morse */
62b30a55dfSJames Morse static u32 *closid_num_dirty_rmid;
63b30a55dfSJames Morse
64b30a55dfSJames Morse /*
65025d5ac9SRandy Dunlap * @rmid_limbo_count - count of currently unused but (potentially)
66fa7d9493SBabu Moger * dirty RMIDs.
67fa7d9493SBabu Moger * This counts RMIDs that no one is currently using but that
68ae2328b5SJames Morse * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
69ae2328b5SJames Morse * change the threshold occupancy value.
70fa7d9493SBabu Moger */
71fa7d9493SBabu Moger static unsigned int rmid_limbo_count;
72fa7d9493SBabu Moger
73025d5ac9SRandy Dunlap /*
74fa7d9493SBabu Moger * @rmid_entry - The entry in the limbo and free lists.
75fa7d9493SBabu Moger */
76fa7d9493SBabu Moger static struct rmid_entry *rmid_ptrs;
77fa7d9493SBabu Moger
78fa7d9493SBabu Moger /*
79fa7d9493SBabu Moger * Global boolean for rdt_monitor which is true if any
80fa7d9493SBabu Moger * resource monitoring is enabled.
81fa7d9493SBabu Moger */
82fa7d9493SBabu Moger bool rdt_mon_capable;
83fa7d9493SBabu Moger
84fa7d9493SBabu Moger /*
85fa7d9493SBabu Moger * Global to indicate which monitoring events are enabled.
86fa7d9493SBabu Moger */
87fa7d9493SBabu Moger unsigned int rdt_mon_features;
88fa7d9493SBabu Moger
89fa7d9493SBabu Moger /*
90ae2328b5SJames Morse * This is the threshold cache occupancy in bytes at which we will consider an
91fa7d9493SBabu Moger * RMID available for re-allocation.
92fa7d9493SBabu Moger */
93ae2328b5SJames Morse unsigned int resctrl_rmid_realloc_threshold;
94fa7d9493SBabu Moger
95d80975e2SJames Morse /*
96d80975e2SJames Morse * This is the maximum value for the reallocation threshold, in bytes.
97d80975e2SJames Morse */
98d80975e2SJames Morse unsigned int resctrl_rmid_realloc_limit;
99d80975e2SJames Morse
1004868a61dSFenghua Yu #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
1014868a61dSFenghua Yu
102e13db55bSTony Luck static int snc_nodes_per_l3_cache = 1;
103e13db55bSTony Luck
1044868a61dSFenghua Yu /*
105ff61f079SJonathan Corbet * The correction factor table is documented in Documentation/arch/x86/resctrl.rst.
1064868a61dSFenghua Yu * If rmid > rmid threshold, MBM total and local values should be multiplied
1074868a61dSFenghua Yu * by the correction factor.
1084868a61dSFenghua Yu *
1094868a61dSFenghua Yu * The original table is modified for better code:
1104868a61dSFenghua Yu *
1114868a61dSFenghua Yu * 1. The threshold 0 is changed to rmid count - 1 so don't do correction
1124868a61dSFenghua Yu * for the case.
1134868a61dSFenghua Yu * 2. MBM total and local correction table indexed by core counter which is
1144868a61dSFenghua Yu * equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27.
1154868a61dSFenghua Yu * 3. The correction factor is normalized to 2^20 (1048576) so it's faster
1164868a61dSFenghua Yu * to calculate corrected value by shifting:
1174868a61dSFenghua Yu * corrected_value = (original_value * correction_factor) >> 20
1184868a61dSFenghua Yu */
1194868a61dSFenghua Yu static const struct mbm_correction_factor_table {
1204868a61dSFenghua Yu u32 rmidthreshold;
1214868a61dSFenghua Yu u64 cf;
1224029b970SAndi Kleen } mbm_cf_table[] __initconst = {
1234868a61dSFenghua Yu {7, CF(1.000000)},
1244868a61dSFenghua Yu {15, CF(1.000000)},
1254868a61dSFenghua Yu {15, CF(0.969650)},
1264868a61dSFenghua Yu {31, CF(1.000000)},
1274868a61dSFenghua Yu {31, CF(1.066667)},
1284868a61dSFenghua Yu {31, CF(0.969650)},
1294868a61dSFenghua Yu {47, CF(1.142857)},
1304868a61dSFenghua Yu {63, CF(1.000000)},
1314868a61dSFenghua Yu {63, CF(1.185115)},
1324868a61dSFenghua Yu {63, CF(1.066553)},
1334868a61dSFenghua Yu {79, CF(1.454545)},
1344868a61dSFenghua Yu {95, CF(1.000000)},
1354868a61dSFenghua Yu {95, CF(1.230769)},
1364868a61dSFenghua Yu {95, CF(1.142857)},
1374868a61dSFenghua Yu {95, CF(1.066667)},
1384868a61dSFenghua Yu {127, CF(1.000000)},
1394868a61dSFenghua Yu {127, CF(1.254863)},
1404868a61dSFenghua Yu {127, CF(1.185255)},
1414868a61dSFenghua Yu {151, CF(1.000000)},
1424868a61dSFenghua Yu {127, CF(1.066667)},
1434868a61dSFenghua Yu {167, CF(1.000000)},
1444868a61dSFenghua Yu {159, CF(1.454334)},
1454868a61dSFenghua Yu {183, CF(1.000000)},
1464868a61dSFenghua Yu {127, CF(0.969744)},
1474868a61dSFenghua Yu {191, CF(1.280246)},
1484868a61dSFenghua Yu {191, CF(1.230921)},
1494868a61dSFenghua Yu {215, CF(1.000000)},
1504868a61dSFenghua Yu {191, CF(1.143118)},
1514868a61dSFenghua Yu };
1524868a61dSFenghua Yu
1534868a61dSFenghua Yu static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX;
1544868a61dSFenghua Yu static u64 mbm_cf __read_mostly;
1554868a61dSFenghua Yu
get_corrected_mbm_count(u32 rmid,unsigned long val)1564868a61dSFenghua Yu static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
1574868a61dSFenghua Yu {
1584868a61dSFenghua Yu /* Correct MBM value. */
1594868a61dSFenghua Yu if (rmid > mbm_cf_rmidthreshold)
1604868a61dSFenghua Yu val = (val * mbm_cf) >> 20;
1614868a61dSFenghua Yu
1624868a61dSFenghua Yu return val;
1634868a61dSFenghua Yu }
1644868a61dSFenghua Yu
1656791e0eaSJames Morse /*
1666791e0eaSJames Morse * x86 and arm64 differ in their handling of monitoring.
1676791e0eaSJames Morse * x86's RMID are independent numbers, there is only one source of traffic
1686791e0eaSJames Morse * with an RMID value of '1'.
1696791e0eaSJames Morse * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
1706791e0eaSJames Morse * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
1716791e0eaSJames Morse * value is no longer unique.
1726791e0eaSJames Morse * To account for this, resctrl uses an index. On x86 this is just the RMID,
1736791e0eaSJames Morse * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
1746791e0eaSJames Morse *
1756791e0eaSJames Morse * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
1766791e0eaSJames Morse * must accept an attempt to read every index.
1776791e0eaSJames Morse */
__rmid_entry(u32 idx)1786791e0eaSJames Morse static inline struct rmid_entry *__rmid_entry(u32 idx)
179fa7d9493SBabu Moger {
180fa7d9493SBabu Moger struct rmid_entry *entry;
1816791e0eaSJames Morse u32 closid, rmid;
182fa7d9493SBabu Moger
1836791e0eaSJames Morse entry = &rmid_ptrs[idx];
1846791e0eaSJames Morse resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
1856791e0eaSJames Morse
1866791e0eaSJames Morse WARN_ON_ONCE(entry->closid != closid);
1876791e0eaSJames Morse WARN_ON_ONCE(entry->rmid != rmid);
188fa7d9493SBabu Moger
189fa7d9493SBabu Moger return entry;
190fa7d9493SBabu Moger }
191fa7d9493SBabu Moger
192e13db55bSTony Luck /*
193e13db55bSTony Luck * When Sub-NUMA Cluster (SNC) mode is not enabled (as indicated by
194e13db55bSTony Luck * "snc_nodes_per_l3_cache == 1") no translation of the RMID value is
195e13db55bSTony Luck * needed. The physical RMID is the same as the logical RMID.
196e13db55bSTony Luck *
197e13db55bSTony Luck * On a platform with SNC mode enabled, Linux enables RMID sharing mode
198e13db55bSTony Luck * via MSR 0xCA0 (see the "RMID Sharing Mode" section in the "Intel
199e13db55bSTony Luck * Resource Director Technology Architecture Specification" for a full
200e13db55bSTony Luck * description of RMID sharing mode).
201e13db55bSTony Luck *
202e13db55bSTony Luck * In RMID sharing mode there are fewer "logical RMID" values available
203e13db55bSTony Luck * to accumulate data ("physical RMIDs" are divided evenly between SNC
204e13db55bSTony Luck * nodes that share an L3 cache). Linux creates an rdt_mon_domain for
205e13db55bSTony Luck * each SNC node.
206e13db55bSTony Luck *
207e13db55bSTony Luck * The value loaded into IA32_PQR_ASSOC is the "logical RMID".
208e13db55bSTony Luck *
209e13db55bSTony Luck * Data is collected independently on each SNC node and can be retrieved
210e13db55bSTony Luck * using the "physical RMID" value computed by this function and loaded
211e13db55bSTony Luck * into IA32_QM_EVTSEL. @cpu can be any CPU in the SNC node.
212e13db55bSTony Luck *
213e13db55bSTony Luck * The scope of the IA32_QM_EVTSEL and IA32_QM_CTR MSRs is at the L3
214e13db55bSTony Luck * cache. So a "physical RMID" may be read from any CPU that shares
215e13db55bSTony Luck * the L3 cache with the desired SNC node, not just from a CPU in
216e13db55bSTony Luck * the specific SNC node.
217e13db55bSTony Luck */
logical_rmid_to_physical_rmid(int cpu,int lrmid)218e13db55bSTony Luck static int logical_rmid_to_physical_rmid(int cpu, int lrmid)
219e13db55bSTony Luck {
220e13db55bSTony Luck struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
221e13db55bSTony Luck
222e13db55bSTony Luck if (snc_nodes_per_l3_cache == 1)
223e13db55bSTony Luck return lrmid;
224e13db55bSTony Luck
225e13db55bSTony Luck return lrmid + (cpu_to_node(cpu) % snc_nodes_per_l3_cache) * r->num_rmid;
226e13db55bSTony Luck }
227e13db55bSTony Luck
__rmid_read_phys(u32 prmid,enum resctrl_event_id eventid,u64 * val)228e13db55bSTony Luck static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
2292a81160dSPeter Newman {
2302a81160dSPeter Newman u64 msr_val;
2312a81160dSPeter Newman
2322a81160dSPeter Newman /*
2332a81160dSPeter Newman * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
2342a81160dSPeter Newman * with a valid event code for supported resource type and the bits
2352a81160dSPeter Newman * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID,
2362a81160dSPeter Newman * IA32_QM_CTR.data (bits 61:0) reports the monitored data.
2372a81160dSPeter Newman * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62)
2382a81160dSPeter Newman * are error bits.
2392a81160dSPeter Newman */
240e13db55bSTony Luck wrmsr(MSR_IA32_QM_EVTSEL, eventid, prmid);
2412a81160dSPeter Newman rdmsrl(MSR_IA32_QM_CTR, msr_val);
2422a81160dSPeter Newman
2432a81160dSPeter Newman if (msr_val & RMID_VAL_ERROR)
2442a81160dSPeter Newman return -EIO;
2452a81160dSPeter Newman if (msr_val & RMID_VAL_UNAVAIL)
2462a81160dSPeter Newman return -EINVAL;
2472a81160dSPeter Newman
2482a81160dSPeter Newman *val = msr_val;
2492a81160dSPeter Newman return 0;
2502a81160dSPeter Newman }
2512a81160dSPeter Newman
get_arch_mbm_state(struct rdt_hw_mon_domain * hw_dom,u32 rmid,enum resctrl_event_id eventid)252cae2bcb6STony Luck static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
253fea62d37SJames Morse u32 rmid,
254fea62d37SJames Morse enum resctrl_event_id eventid)
255fea62d37SJames Morse {
256fea62d37SJames Morse switch (eventid) {
257fea62d37SJames Morse case QOS_L3_OCCUP_EVENT_ID:
258fea62d37SJames Morse return NULL;
259fea62d37SJames Morse case QOS_L3_MBM_TOTAL_EVENT_ID:
260fea62d37SJames Morse return &hw_dom->arch_mbm_total[rmid];
261fea62d37SJames Morse case QOS_L3_MBM_LOCAL_EVENT_ID:
262fea62d37SJames Morse return &hw_dom->arch_mbm_local[rmid];
263fea62d37SJames Morse }
264fea62d37SJames Morse
265fea62d37SJames Morse /* Never expect to get here */
266fea62d37SJames Morse WARN_ON_ONCE(1);
267fea62d37SJames Morse
268fea62d37SJames Morse return NULL;
269fea62d37SJames Morse }
270fea62d37SJames Morse
resctrl_arch_reset_rmid(struct rdt_resource * r,struct rdt_mon_domain * d,u32 unused,u32 rmid,enum resctrl_event_id eventid)271cae2bcb6STony Luck void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
27240fc735bSJames Morse u32 unused, u32 rmid,
27340fc735bSJames Morse enum resctrl_event_id eventid)
274fea62d37SJames Morse {
275cae2bcb6STony Luck struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
276e13db55bSTony Luck int cpu = cpumask_any(&d->hdr.cpu_mask);
277fea62d37SJames Morse struct arch_mbm_state *am;
278e13db55bSTony Luck u32 prmid;
279fea62d37SJames Morse
280fea62d37SJames Morse am = get_arch_mbm_state(hw_dom, rmid, eventid);
2812a81160dSPeter Newman if (am) {
282fea62d37SJames Morse memset(am, 0, sizeof(*am));
2832a81160dSPeter Newman
284e13db55bSTony Luck prmid = logical_rmid_to_physical_rmid(cpu, rmid);
2852a81160dSPeter Newman /* Record any initial, non-zero count value. */
286e13db55bSTony Luck __rmid_read_phys(prmid, eventid, &am->prev_msr);
2872a81160dSPeter Newman }
288fea62d37SJames Morse }
289fea62d37SJames Morse
29092bd5a13SBabu Moger /*
29192bd5a13SBabu Moger * Assumes that hardware counters are also reset and thus that there is
29292bd5a13SBabu Moger * no need to record initial non-zero counts.
29392bd5a13SBabu Moger */
resctrl_arch_reset_rmid_all(struct rdt_resource * r,struct rdt_mon_domain * d)294cae2bcb6STony Luck void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
29592bd5a13SBabu Moger {
296cae2bcb6STony Luck struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
29792bd5a13SBabu Moger
298d012b66aSJames Morse if (resctrl_arch_is_mbm_total_enabled())
29992bd5a13SBabu Moger memset(hw_dom->arch_mbm_total, 0,
30092bd5a13SBabu Moger sizeof(*hw_dom->arch_mbm_total) * r->num_rmid);
30192bd5a13SBabu Moger
302d012b66aSJames Morse if (resctrl_arch_is_mbm_local_enabled())
30392bd5a13SBabu Moger memset(hw_dom->arch_mbm_local, 0,
30492bd5a13SBabu Moger sizeof(*hw_dom->arch_mbm_local) * r->num_rmid);
30592bd5a13SBabu Moger }
30692bd5a13SBabu Moger
mbm_overflow_count(u64 prev_msr,u64 cur_msr,unsigned int width)3071d81d15dSJames Morse static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
3081d81d15dSJames Morse {
3091d81d15dSJames Morse u64 shift = 64 - width, chunks;
3101d81d15dSJames Morse
3111d81d15dSJames Morse chunks = (cur_msr << shift) - (prev_msr << shift);
3121d81d15dSJames Morse return chunks >> shift;
3131d81d15dSJames Morse }
3141d81d15dSJames Morse
resctrl_arch_rmid_read(struct rdt_resource * r,struct rdt_mon_domain * d,u32 unused,u32 rmid,enum resctrl_event_id eventid,u64 * val,void * ignored)315cae2bcb6STony Luck int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
31640fc735bSJames Morse u32 unused, u32 rmid, enum resctrl_event_id eventid,
317e557999fSJames Morse u64 *val, void *ignored)
318fa7d9493SBabu Moger {
319cae2bcb6STony Luck struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
3201d81d15dSJames Morse struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
321e13db55bSTony Luck int cpu = cpumask_any(&d->hdr.cpu_mask);
3221d81d15dSJames Morse struct arch_mbm_state *am;
323f7b1843eSJames Morse u64 msr_val, chunks;
324e13db55bSTony Luck u32 prmid;
3252a81160dSPeter Newman int ret;
326fa7d9493SBabu Moger
3276fde1424SJames Morse resctrl_arch_rmid_read_context_check();
3286fde1424SJames Morse
329e13db55bSTony Luck prmid = logical_rmid_to_physical_rmid(cpu, rmid);
330e13db55bSTony Luck ret = __rmid_read_phys(prmid, eventid, &msr_val);
3312a81160dSPeter Newman if (ret)
3322a81160dSPeter Newman return ret;
3334d044c52SJames Morse
3341d81d15dSJames Morse am = get_arch_mbm_state(hw_dom, rmid, eventid);
3351d81d15dSJames Morse if (am) {
33638f72f50SJames Morse am->chunks += mbm_overflow_count(am->prev_msr, msr_val,
33738f72f50SJames Morse hw_res->mbm_width);
338f7b1843eSJames Morse chunks = get_corrected_mbm_count(rmid, am->chunks);
3391d81d15dSJames Morse am->prev_msr = msr_val;
3401d81d15dSJames Morse } else {
341f7b1843eSJames Morse chunks = msr_val;
3421d81d15dSJames Morse }
3434d044c52SJames Morse
344f7b1843eSJames Morse *val = chunks * hw_res->mon_scale;
345f7b1843eSJames Morse
3464d044c52SJames Morse return 0;
347fa7d9493SBabu Moger }
348fa7d9493SBabu Moger
limbo_release_entry(struct rmid_entry * entry)349b30a55dfSJames Morse static void limbo_release_entry(struct rmid_entry *entry)
350b30a55dfSJames Morse {
351b30a55dfSJames Morse lockdep_assert_held(&rdtgroup_mutex);
352b30a55dfSJames Morse
353b30a55dfSJames Morse rmid_limbo_count--;
354b30a55dfSJames Morse list_add_tail(&entry->list, &rmid_free_lru);
355b30a55dfSJames Morse
356b30a55dfSJames Morse if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
357b30a55dfSJames Morse closid_num_dirty_rmid[entry->closid]--;
358b30a55dfSJames Morse }
359b30a55dfSJames Morse
360fa7d9493SBabu Moger /*
361fa7d9493SBabu Moger * Check the RMIDs that are marked as busy for this domain. If the
362fa7d9493SBabu Moger * reported LLC occupancy is below the threshold clear the busy bit and
363fa7d9493SBabu Moger * decrement the count. If the busy count gets to zero on an RMID, we
364fa7d9493SBabu Moger * free the RMID
365fa7d9493SBabu Moger */
__check_limbo(struct rdt_mon_domain * d,bool force_free)366cae2bcb6STony Luck void __check_limbo(struct rdt_mon_domain *d, bool force_free)
367fa7d9493SBabu Moger {
3683c021531SJames Morse struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
3696791e0eaSJames Morse u32 idx_limit = resctrl_arch_system_num_rmid_idx();
370fa7d9493SBabu Moger struct rmid_entry *entry;
3716791e0eaSJames Morse u32 idx, cur_idx = 1;
372e557999fSJames Morse void *arch_mon_ctx;
3738286618aSJames Morse bool rmid_dirty;
3748286618aSJames Morse u64 val = 0;
375fa7d9493SBabu Moger
376e557999fSJames Morse arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
377e557999fSJames Morse if (IS_ERR(arch_mon_ctx)) {
378e557999fSJames Morse pr_warn_ratelimited("Failed to allocate monitor context: %ld",
379e557999fSJames Morse PTR_ERR(arch_mon_ctx));
380e557999fSJames Morse return;
381e557999fSJames Morse }
382e557999fSJames Morse
383fa7d9493SBabu Moger /*
384fa7d9493SBabu Moger * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
385fa7d9493SBabu Moger * are marked as busy for occupancy < threshold. If the occupancy
386fa7d9493SBabu Moger * is less than the threshold decrement the busy counter of the
387fa7d9493SBabu Moger * RMID and move it to the free list when the counter reaches 0.
388fa7d9493SBabu Moger */
389fa7d9493SBabu Moger for (;;) {
3906791e0eaSJames Morse idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
3916791e0eaSJames Morse if (idx >= idx_limit)
392fa7d9493SBabu Moger break;
393fa7d9493SBabu Moger
3946791e0eaSJames Morse entry = __rmid_entry(idx);
39540fc735bSJames Morse if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
396e557999fSJames Morse QOS_L3_OCCUP_EVENT_ID, &val,
397e557999fSJames Morse arch_mon_ctx)) {
3988286618aSJames Morse rmid_dirty = true;
399ae2328b5SJames Morse } else {
400ae2328b5SJames Morse rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
401931be446SHaifeng Xu
402931be446SHaifeng Xu /*
403931be446SHaifeng Xu * x86's CLOSID and RMID are independent numbers, so the entry's
404931be446SHaifeng Xu * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
405931be446SHaifeng Xu * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
406931be446SHaifeng Xu * used to select the configuration. It is thus necessary to track both
407931be446SHaifeng Xu * CLOSID and RMID because there may be dependencies between them
408931be446SHaifeng Xu * on some architectures.
409931be446SHaifeng Xu */
410c103d4d4STony Luck trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
411ae2328b5SJames Morse }
4128286618aSJames Morse
4138286618aSJames Morse if (force_free || !rmid_dirty) {
4146791e0eaSJames Morse clear_bit(idx, d->rmid_busy_llc);
415b30a55dfSJames Morse if (!--entry->busy)
416b30a55dfSJames Morse limbo_release_entry(entry);
417fa7d9493SBabu Moger }
4186791e0eaSJames Morse cur_idx = idx + 1;
419fa7d9493SBabu Moger }
420e557999fSJames Morse
421e557999fSJames Morse resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
422fa7d9493SBabu Moger }
423fa7d9493SBabu Moger
has_busy_rmid(struct rdt_mon_domain * d)424cae2bcb6STony Luck bool has_busy_rmid(struct rdt_mon_domain *d)
425fa7d9493SBabu Moger {
4266791e0eaSJames Morse u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4276791e0eaSJames Morse
4286791e0eaSJames Morse return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
429fa7d9493SBabu Moger }
430fa7d9493SBabu Moger
resctrl_find_free_rmid(u32 closid)431c4c0376eSJames Morse static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
432c4c0376eSJames Morse {
433c4c0376eSJames Morse struct rmid_entry *itr;
434c4c0376eSJames Morse u32 itr_idx, cmp_idx;
435c4c0376eSJames Morse
436c4c0376eSJames Morse if (list_empty(&rmid_free_lru))
437c4c0376eSJames Morse return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
438c4c0376eSJames Morse
439c4c0376eSJames Morse list_for_each_entry(itr, &rmid_free_lru, list) {
440fa7d9493SBabu Moger /*
441c4c0376eSJames Morse * Get the index of this free RMID, and the index it would need
442c4c0376eSJames Morse * to be if it were used with this CLOSID.
443c4c0376eSJames Morse * If the CLOSID is irrelevant on this architecture, the two
444c4c0376eSJames Morse * index values are always the same on every entry and thus the
445c4c0376eSJames Morse * very first entry will be returned.
446fa7d9493SBabu Moger */
447c4c0376eSJames Morse itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
448c4c0376eSJames Morse cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
449c4c0376eSJames Morse
450c4c0376eSJames Morse if (itr_idx == cmp_idx)
451c4c0376eSJames Morse return itr;
452c4c0376eSJames Morse }
453c4c0376eSJames Morse
454c4c0376eSJames Morse return ERR_PTR(-ENOSPC);
455c4c0376eSJames Morse }
456c4c0376eSJames Morse
4576eac36bbSJames Morse /**
4586eac36bbSJames Morse * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
4596eac36bbSJames Morse * RMID are clean, or the CLOSID that has
4606eac36bbSJames Morse * the most clean RMID.
4616eac36bbSJames Morse *
4626eac36bbSJames Morse * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
4636eac36bbSJames Morse * may not be able to allocate clean RMID. To avoid this the allocator will
4646eac36bbSJames Morse * choose the CLOSID with the most clean RMID.
4656eac36bbSJames Morse *
4666eac36bbSJames Morse * When the CLOSID and RMID are independent numbers, the first free CLOSID will
4676eac36bbSJames Morse * be returned.
4686eac36bbSJames Morse */
resctrl_find_cleanest_closid(void)4696eac36bbSJames Morse int resctrl_find_cleanest_closid(void)
4706eac36bbSJames Morse {
4716eac36bbSJames Morse u32 cleanest_closid = ~0;
4726eac36bbSJames Morse int i = 0;
4736eac36bbSJames Morse
4746eac36bbSJames Morse lockdep_assert_held(&rdtgroup_mutex);
4756eac36bbSJames Morse
4766eac36bbSJames Morse if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
4776eac36bbSJames Morse return -EIO;
4786eac36bbSJames Morse
4796eac36bbSJames Morse for (i = 0; i < closids_supported(); i++) {
4806eac36bbSJames Morse int num_dirty;
4816eac36bbSJames Morse
4826eac36bbSJames Morse if (closid_allocated(i))
4836eac36bbSJames Morse continue;
4846eac36bbSJames Morse
4856eac36bbSJames Morse num_dirty = closid_num_dirty_rmid[i];
4866eac36bbSJames Morse if (num_dirty == 0)
4876eac36bbSJames Morse return i;
4886eac36bbSJames Morse
4896eac36bbSJames Morse if (cleanest_closid == ~0)
4906eac36bbSJames Morse cleanest_closid = i;
4916eac36bbSJames Morse
4926eac36bbSJames Morse if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
4936eac36bbSJames Morse cleanest_closid = i;
4946eac36bbSJames Morse }
4956eac36bbSJames Morse
4966eac36bbSJames Morse if (cleanest_closid == ~0)
4976eac36bbSJames Morse return -ENOSPC;
4986eac36bbSJames Morse
4996eac36bbSJames Morse return cleanest_closid;
5006eac36bbSJames Morse }
5016eac36bbSJames Morse
502c4c0376eSJames Morse /*
503c4c0376eSJames Morse * For MPAM the RMID value is not unique, and has to be considered with
504c4c0376eSJames Morse * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
505c4c0376eSJames Morse * allows all domains to be managed by a single free list.
506c4c0376eSJames Morse * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
507c4c0376eSJames Morse */
alloc_rmid(u32 closid)508c4c0376eSJames Morse int alloc_rmid(u32 closid)
509fa7d9493SBabu Moger {
510fa7d9493SBabu Moger struct rmid_entry *entry;
511fa7d9493SBabu Moger
512fa7d9493SBabu Moger lockdep_assert_held(&rdtgroup_mutex);
513fa7d9493SBabu Moger
514c4c0376eSJames Morse entry = resctrl_find_free_rmid(closid);
515c4c0376eSJames Morse if (IS_ERR(entry))
516c4c0376eSJames Morse return PTR_ERR(entry);
517fa7d9493SBabu Moger
518fa7d9493SBabu Moger list_del(&entry->list);
519fa7d9493SBabu Moger return entry->rmid;
520fa7d9493SBabu Moger }
521fa7d9493SBabu Moger
add_rmid_to_limbo(struct rmid_entry * entry)522fa7d9493SBabu Moger static void add_rmid_to_limbo(struct rmid_entry *entry)
523fa7d9493SBabu Moger {
5243c021531SJames Morse struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
525cae2bcb6STony Luck struct rdt_mon_domain *d;
5266791e0eaSJames Morse u32 idx;
5276791e0eaSJames Morse
528b30a55dfSJames Morse lockdep_assert_held(&rdtgroup_mutex);
529b30a55dfSJames Morse
530fb700810SJames Morse /* Walking r->domains, ensure it can't race with cpuhp */
531fb700810SJames Morse lockdep_assert_cpus_held();
532fb700810SJames Morse
5336791e0eaSJames Morse idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
534fa7d9493SBabu Moger
535fa7d9493SBabu Moger entry->busy = 0;
536cd84f72bSTony Luck list_for_each_entry(d, &r->mon_domains, hdr.list) {
537fa7d9493SBabu Moger /*
538fa7d9493SBabu Moger * For the first limbo RMID in the domain,
539fa7d9493SBabu Moger * setup up the limbo worker.
540fa7d9493SBabu Moger */
5416791e0eaSJames Morse if (!has_busy_rmid(d))
542978fcca9SJames Morse cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
543978fcca9SJames Morse RESCTRL_PICK_ANY_CPU);
5446791e0eaSJames Morse set_bit(idx, d->rmid_busy_llc);
545fa7d9493SBabu Moger entry->busy++;
546fa7d9493SBabu Moger }
547fa7d9493SBabu Moger
548fa7d9493SBabu Moger rmid_limbo_count++;
549b30a55dfSJames Morse if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
550b30a55dfSJames Morse closid_num_dirty_rmid[entry->closid]++;
551b30a55dfSJames Morse }
552fa7d9493SBabu Moger
free_rmid(u32 closid,u32 rmid)55340fc735bSJames Morse void free_rmid(u32 closid, u32 rmid)
554fa7d9493SBabu Moger {
5556791e0eaSJames Morse u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
556fa7d9493SBabu Moger struct rmid_entry *entry;
557fa7d9493SBabu Moger
558fa7d9493SBabu Moger lockdep_assert_held(&rdtgroup_mutex);
559fa7d9493SBabu Moger
5606791e0eaSJames Morse /*
5616791e0eaSJames Morse * Do not allow the default rmid to be free'd. Comparing by index
5626791e0eaSJames Morse * allows architectures that ignore the closid parameter to avoid an
5636791e0eaSJames Morse * unnecessary check.
5646791e0eaSJames Morse */
565739c9765SDave Martin if (!resctrl_arch_mon_capable() ||
566739c9765SDave Martin idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
5676791e0eaSJames Morse RESCTRL_RESERVED_RMID))
5686791e0eaSJames Morse return;
5696791e0eaSJames Morse
5706791e0eaSJames Morse entry = __rmid_entry(idx);
571fa7d9493SBabu Moger
572d012b66aSJames Morse if (resctrl_arch_is_llc_occupancy_enabled())
573fa7d9493SBabu Moger add_rmid_to_limbo(entry);
574fa7d9493SBabu Moger else
575fa7d9493SBabu Moger list_add_tail(&entry->list, &rmid_free_lru);
576fa7d9493SBabu Moger }
577fa7d9493SBabu Moger
get_mbm_state(struct rdt_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id evtid)578cae2bcb6STony Luck static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
57940fc735bSJames Morse u32 rmid, enum resctrl_event_id evtid)
580322b72e0SPeter Newman {
5816791e0eaSJames Morse u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
5826791e0eaSJames Morse
583322b72e0SPeter Newman switch (evtid) {
584322b72e0SPeter Newman case QOS_L3_MBM_TOTAL_EVENT_ID:
5856791e0eaSJames Morse return &d->mbm_total[idx];
586322b72e0SPeter Newman case QOS_L3_MBM_LOCAL_EVENT_ID:
5876791e0eaSJames Morse return &d->mbm_local[idx];
588322b72e0SPeter Newman default:
589322b72e0SPeter Newman return NULL;
590322b72e0SPeter Newman }
591322b72e0SPeter Newman }
592322b72e0SPeter Newman
__mon_event_count(u32 closid,u32 rmid,struct rmid_read * rr)59340fc735bSJames Morse static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
594fa7d9493SBabu Moger {
5959fbb303eSTony Luck int cpu = smp_processor_id();
5969fbb303eSTony Luck struct rdt_mon_domain *d;
597fa7d9493SBabu Moger struct mbm_state *m;
5989fbb303eSTony Luck int err, ret;
5991d81d15dSJames Morse u64 tval = 0;
600fa7d9493SBabu Moger
601322b72e0SPeter Newman if (rr->first) {
60240fc735bSJames Morse resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
60340fc735bSJames Morse m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
604322b72e0SPeter Newman if (m)
605322b72e0SPeter Newman memset(m, 0, sizeof(struct mbm_state));
606322b72e0SPeter Newman return 0;
607322b72e0SPeter Newman }
608fea62d37SJames Morse
6099fbb303eSTony Luck if (rr->d) {
6109fbb303eSTony Luck /* Reading a single domain, must be on a CPU in that domain. */
6119fbb303eSTony Luck if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
6129fbb303eSTony Luck return -EINVAL;
6139fbb303eSTony Luck rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
6149fbb303eSTony Luck rr->evtid, &tval, rr->arch_mon_ctx);
6154d044c52SJames Morse if (rr->err)
6164d044c52SJames Morse return rr->err;
6174d044c52SJames Morse
61838f72f50SJames Morse rr->val += tval;
6194868a61dSFenghua Yu
620fa7d9493SBabu Moger return 0;
621fa7d9493SBabu Moger }
622fa7d9493SBabu Moger
6239fbb303eSTony Luck /* Summing domains that share a cache, must be on a CPU for that cache. */
6249fbb303eSTony Luck if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
6259fbb303eSTony Luck return -EINVAL;
6269fbb303eSTony Luck
6279fbb303eSTony Luck /*
6289fbb303eSTony Luck * Legacy files must report the sum of an event across all
6299fbb303eSTony Luck * domains that share the same L3 cache instance.
6309fbb303eSTony Luck * Report success if a read from any domain succeeds, -EINVAL
6319fbb303eSTony Luck * (translated to "Unavailable" for user space) if reading from
6329fbb303eSTony Luck * all domains fail for any reason.
6339fbb303eSTony Luck */
6349fbb303eSTony Luck ret = -EINVAL;
6359fbb303eSTony Luck list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
6369fbb303eSTony Luck if (d->ci->id != rr->ci->id)
6379fbb303eSTony Luck continue;
6389fbb303eSTony Luck err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
6399fbb303eSTony Luck rr->evtid, &tval, rr->arch_mon_ctx);
6409fbb303eSTony Luck if (!err) {
6419fbb303eSTony Luck rr->val += tval;
6429fbb303eSTony Luck ret = 0;
6439fbb303eSTony Luck }
6449fbb303eSTony Luck }
6459fbb303eSTony Luck
6469fbb303eSTony Luck if (ret)
6479fbb303eSTony Luck rr->err = ret;
6489fbb303eSTony Luck
6499fbb303eSTony Luck return ret;
6509fbb303eSTony Luck }
6519fbb303eSTony Luck
652fa7d9493SBabu Moger /*
65330442571SJames Morse * mbm_bw_count() - Update bw count from values previously read by
65430442571SJames Morse * __mon_event_count().
65540fc735bSJames Morse * @closid: The closid used to identify the cached mbm_state.
65630442571SJames Morse * @rmid: The rmid used to identify the cached mbm_state.
65730442571SJames Morse * @rr: The struct rmid_read populated by __mon_event_count().
65830442571SJames Morse *
659fa7d9493SBabu Moger * Supporting function to calculate the memory bandwidth
66030442571SJames Morse * and delta bandwidth in MBps. The chunks value previously read by
66130442571SJames Morse * __mon_event_count() is compared with the chunks value from the previous
66230442571SJames Morse * invocation. This must be called once per second to maintain values in MBps.
663fa7d9493SBabu Moger */
mbm_bw_count(u32 closid,u32 rmid,struct rmid_read * rr)66440fc735bSJames Morse static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr)
665fa7d9493SBabu Moger {
666f7b1843eSJames Morse u64 cur_bw, bytes, cur_bytes;
6672c272fadSTony Luck struct mbm_state *m;
6682c272fadSTony Luck
6692c272fadSTony Luck m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
6702c272fadSTony Luck if (WARN_ON_ONCE(!m))
6712c272fadSTony Luck return;
672fa7d9493SBabu Moger
673f7b1843eSJames Morse cur_bytes = rr->val;
674f7b1843eSJames Morse bytes = cur_bytes - m->prev_bw_bytes;
675f7b1843eSJames Morse m->prev_bw_bytes = cur_bytes;
676fa7d9493SBabu Moger
677f7b1843eSJames Morse cur_bw = bytes / SZ_1M;
678fa7d9493SBabu Moger
679fa7d9493SBabu Moger m->prev_bw = cur_bw;
680fa7d9493SBabu Moger }
681fa7d9493SBabu Moger
682fa7d9493SBabu Moger /*
68309909e09SJames Morse * This is scheduled by mon_event_read() to read the CQM/MBM counters
684fa7d9493SBabu Moger * on a domain.
685fa7d9493SBabu Moger */
mon_event_count(void * info)686fa7d9493SBabu Moger void mon_event_count(void *info)
687fa7d9493SBabu Moger {
688fa7d9493SBabu Moger struct rdtgroup *rdtgrp, *entry;
689fa7d9493SBabu Moger struct rmid_read *rr = info;
690fa7d9493SBabu Moger struct list_head *head;
6914d044c52SJames Morse int ret;
692fa7d9493SBabu Moger
693fa7d9493SBabu Moger rdtgrp = rr->rgrp;
694fa7d9493SBabu Moger
69540fc735bSJames Morse ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);
696fa7d9493SBabu Moger
697fa7d9493SBabu Moger /*
698064855a6SBabu Moger * For Ctrl groups read data from child monitor groups and
699064855a6SBabu Moger * add them together. Count events which are read successfully.
700064855a6SBabu Moger * Discard the rmid_read's reporting errors.
701fa7d9493SBabu Moger */
702fa7d9493SBabu Moger head = &rdtgrp->mon.crdtgrp_list;
703fa7d9493SBabu Moger
704fa7d9493SBabu Moger if (rdtgrp->type == RDTCTRL_GROUP) {
705fa7d9493SBabu Moger list_for_each_entry(entry, head, mon.crdtgrp_list) {
70640fc735bSJames Morse if (__mon_event_count(entry->closid, entry->mon.rmid,
70740fc735bSJames Morse rr) == 0)
7084d044c52SJames Morse ret = 0;
709fa7d9493SBabu Moger }
710fa7d9493SBabu Moger }
711064855a6SBabu Moger
7124d044c52SJames Morse /*
7134d044c52SJames Morse * __mon_event_count() calls for newly created monitor groups may
7144d044c52SJames Morse * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
7154d044c52SJames Morse * Discard error if any of the monitor event reads succeeded.
7164d044c52SJames Morse */
7174d044c52SJames Morse if (ret == 0)
7184d044c52SJames Morse rr->err = 0;
719fa7d9493SBabu Moger }
720fa7d9493SBabu Moger
get_ctrl_domain_from_cpu(int cpu,struct rdt_resource * r)721*823beb31SJames Morse static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
722*823beb31SJames Morse struct rdt_resource *r)
723*823beb31SJames Morse {
724*823beb31SJames Morse struct rdt_ctrl_domain *d;
725*823beb31SJames Morse
726*823beb31SJames Morse lockdep_assert_cpus_held();
727*823beb31SJames Morse
728*823beb31SJames Morse list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
729*823beb31SJames Morse /* Find the domain that contains this CPU */
730*823beb31SJames Morse if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
731*823beb31SJames Morse return d;
732*823beb31SJames Morse }
733*823beb31SJames Morse
734*823beb31SJames Morse return NULL;
735*823beb31SJames Morse }
736*823beb31SJames Morse
737fa7d9493SBabu Moger /*
738fa7d9493SBabu Moger * Feedback loop for MBA software controller (mba_sc)
739fa7d9493SBabu Moger *
740fa7d9493SBabu Moger * mba_sc is a feedback loop where we periodically read MBM counters and
741fa7d9493SBabu Moger * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
742fa7d9493SBabu Moger * that:
743fa7d9493SBabu Moger *
744d9f6e12fSIngo Molnar * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
745fa7d9493SBabu Moger *
746fa7d9493SBabu Moger * This uses the MBM counters to measure the bandwidth and MBA throttle
747fa7d9493SBabu Moger * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
748fa7d9493SBabu Moger * fact that resctrl rdtgroups have both monitoring and control.
749fa7d9493SBabu Moger *
750fa7d9493SBabu Moger * The frequency of the checks is 1s and we just tag along the MBM overflow
751fa7d9493SBabu Moger * timer. Having 1s interval makes the calculation of bandwidth simpler.
752fa7d9493SBabu Moger *
753fa7d9493SBabu Moger * Although MBA's goal is to restrict the bandwidth to a maximum, there may
754163b0991SIngo Molnar * be a need to increase the bandwidth to avoid unnecessarily restricting
755fa7d9493SBabu Moger * the L2 <-> L3 traffic.
756fa7d9493SBabu Moger *
757fa7d9493SBabu Moger * Since MBA controls the L2 external bandwidth where as MBM measures the
758fa7d9493SBabu Moger * L3 external bandwidth the following sequence could lead to such a
759fa7d9493SBabu Moger * situation.
760fa7d9493SBabu Moger *
761fa7d9493SBabu Moger * Consider an rdtgroup which had high L3 <-> memory traffic in initial
762fa7d9493SBabu Moger * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
763fa7d9493SBabu Moger * after some time rdtgroup has mostly L2 <-> L3 traffic.
764fa7d9493SBabu Moger *
765fa7d9493SBabu Moger * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
766fa7d9493SBabu Moger * throttle MSRs already have low percentage values. To avoid
767fa7d9493SBabu Moger * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
768fa7d9493SBabu Moger */
update_mba_bw(struct rdtgroup * rgrp,struct rdt_mon_domain * dom_mbm)769cae2bcb6STony Luck static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
770fa7d9493SBabu Moger {
771ff6357bbSJames Morse u32 closid, rmid, cur_msr_val, new_msr_val;
772fa7d9493SBabu Moger struct mbm_state *pmbm_data, *cmbm_data;
773cae2bcb6STony Luck struct rdt_ctrl_domain *dom_mba;
774481d3637STony Luck enum resctrl_event_id evt_id;
775fa7d9493SBabu Moger struct rdt_resource *r_mba;
776fa7d9493SBabu Moger struct list_head *head;
777fa7d9493SBabu Moger struct rdtgroup *entry;
778481d3637STony Luck u32 cur_bw, user_bw;
779c7563e62SPrarit Bhargava
7803c021531SJames Morse r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
781481d3637STony Luck evt_id = rgrp->mba_mbps_event;
782ff6357bbSJames Morse
783fa7d9493SBabu Moger closid = rgrp->closid;
784fa7d9493SBabu Moger rmid = rgrp->mon.rmid;
785481d3637STony Luck pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
786481d3637STony Luck if (WARN_ON_ONCE(!pmbm_data))
787481d3637STony Luck return;
788fa7d9493SBabu Moger
789cd84f72bSTony Luck dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
790fa7d9493SBabu Moger if (!dom_mba) {
791fa7d9493SBabu Moger pr_warn_once("Failure to get domain for MBA update\n");
792fa7d9493SBabu Moger return;
793fa7d9493SBabu Moger }
794fa7d9493SBabu Moger
795fa7d9493SBabu Moger cur_bw = pmbm_data->prev_bw;
7966ce1560dSJames Morse user_bw = dom_mba->mbps_val[closid];
7976ce1560dSJames Morse
7986ce1560dSJames Morse /* MBA resource doesn't support CDP */
7996ce1560dSJames Morse cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
800fa7d9493SBabu Moger
801fa7d9493SBabu Moger /*
802fa7d9493SBabu Moger * For Ctrl groups read data from child monitor groups.
803fa7d9493SBabu Moger */
804fa7d9493SBabu Moger head = &rgrp->mon.crdtgrp_list;
805fa7d9493SBabu Moger list_for_each_entry(entry, head, mon.crdtgrp_list) {
806481d3637STony Luck cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
807481d3637STony Luck if (WARN_ON_ONCE(!cmbm_data))
808481d3637STony Luck return;
809fa7d9493SBabu Moger cur_bw += cmbm_data->prev_bw;
810fa7d9493SBabu Moger }
811fa7d9493SBabu Moger
812fa7d9493SBabu Moger /*
813fa7d9493SBabu Moger * Scale up/down the bandwidth linearly for the ctrl group. The
814fa7d9493SBabu Moger * bandwidth step is the bandwidth granularity specified by the
815fa7d9493SBabu Moger * hardware.
816c2427e70STony Luck * Always increase throttling if current bandwidth is above the
817c2427e70STony Luck * target set by user.
818c2427e70STony Luck * But avoid thrashing up and down on every poll by checking
819c2427e70STony Luck * whether a decrease in throttling is likely to push the group
820c2427e70STony Luck * back over target. E.g. if currently throttling to 30% of bandwidth
821c2427e70STony Luck * on a system with 10% granularity steps, check whether moving to
822c2427e70STony Luck * 40% would go past the limit by multiplying current bandwidth by
823c2427e70STony Luck * "(30 + 10) / 30".
824fa7d9493SBabu Moger */
825fa7d9493SBabu Moger if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
826fa7d9493SBabu Moger new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
827fa7d9493SBabu Moger } else if (cur_msr_val < MAX_MBA_BW &&
828c2427e70STony Luck (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
829fa7d9493SBabu Moger new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
830fa7d9493SBabu Moger } else {
831fa7d9493SBabu Moger return;
832fa7d9493SBabu Moger }
833fa7d9493SBabu Moger
834ff6357bbSJames Morse resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
835fa7d9493SBabu Moger }
836fa7d9493SBabu Moger
mbm_update_one_event(struct rdt_resource * r,struct rdt_mon_domain * d,u32 closid,u32 rmid,enum resctrl_event_id evtid)8372c272fadSTony Luck static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
8382c272fadSTony Luck u32 closid, u32 rmid, enum resctrl_event_id evtid)
839fa7d9493SBabu Moger {
840587edd70STony Luck struct rmid_read rr = {0};
841fa7d9493SBabu Moger
84246637d45SReinette Chatre rr.r = r;
843fa7d9493SBabu Moger rr.d = d;
8442c272fadSTony Luck rr.evtid = evtid;
845e557999fSJames Morse rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
846e557999fSJames Morse if (IS_ERR(rr.arch_mon_ctx)) {
847e557999fSJames Morse pr_warn_ratelimited("Failed to allocate monitor context: %ld",
848e557999fSJames Morse PTR_ERR(rr.arch_mon_ctx));
849e557999fSJames Morse return;
850e557999fSJames Morse }
851e557999fSJames Morse
85240fc735bSJames Morse __mon_event_count(closid, rmid, &rr);
853fa7d9493SBabu Moger
854fa7d9493SBabu Moger /*
8552c272fadSTony Luck * If the software controller is enabled, compute the
8562c272fadSTony Luck * bandwidth for this event id.
857fa7d9493SBabu Moger */
85806c5fe9bSXiaochen Shen if (is_mba_sc(NULL))
85940fc735bSJames Morse mbm_bw_count(closid, rmid, &rr);
860e557999fSJames Morse
861e557999fSJames Morse resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
862fa7d9493SBabu Moger }
8632c272fadSTony Luck
mbm_update(struct rdt_resource * r,struct rdt_mon_domain * d,u32 closid,u32 rmid)8642c272fadSTony Luck static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
8652c272fadSTony Luck u32 closid, u32 rmid)
8662c272fadSTony Luck {
8672c272fadSTony Luck /*
8682c272fadSTony Luck * This is protected from concurrent reads from user as both
8692c272fadSTony Luck * the user and overflow handler hold the global mutex.
8702c272fadSTony Luck */
871d012b66aSJames Morse if (resctrl_arch_is_mbm_total_enabled())
8722c272fadSTony Luck mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID);
8732c272fadSTony Luck
874d012b66aSJames Morse if (resctrl_arch_is_mbm_local_enabled())
8752c272fadSTony Luck mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID);
876fa7d9493SBabu Moger }
877fa7d9493SBabu Moger
878fa7d9493SBabu Moger /*
879fa7d9493SBabu Moger * Handler to scan the limbo list and move the RMIDs
880fa7d9493SBabu Moger * to free list whose occupancy < threshold_occupancy.
881fa7d9493SBabu Moger */
cqm_handle_limbo(struct work_struct * work)882fa7d9493SBabu Moger void cqm_handle_limbo(struct work_struct *work)
883fa7d9493SBabu Moger {
884fa7d9493SBabu Moger unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
885cae2bcb6STony Luck struct rdt_mon_domain *d;
886fa7d9493SBabu Moger
887fb700810SJames Morse cpus_read_lock();
888fa7d9493SBabu Moger mutex_lock(&rdtgroup_mutex);
889fa7d9493SBabu Moger
890cae2bcb6STony Luck d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
891fa7d9493SBabu Moger
892fa7d9493SBabu Moger __check_limbo(d, false);
893fa7d9493SBabu Moger
894a4846aafSJames Morse if (has_busy_rmid(d)) {
895c103d4d4STony Luck d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
896978fcca9SJames Morse RESCTRL_PICK_ANY_CPU);
897a4846aafSJames Morse schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
898a4846aafSJames Morse delay);
899a4846aafSJames Morse }
900fa7d9493SBabu Moger
901fa7d9493SBabu Moger mutex_unlock(&rdtgroup_mutex);
902fb700810SJames Morse cpus_read_unlock();
903fa7d9493SBabu Moger }
904fa7d9493SBabu Moger
905978fcca9SJames Morse /**
906978fcca9SJames Morse * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
907978fcca9SJames Morse * domain.
908978fcca9SJames Morse * @dom: The domain the limbo handler should run for.
909978fcca9SJames Morse * @delay_ms: How far in the future the handler should run.
910978fcca9SJames Morse * @exclude_cpu: Which CPU the handler should not run on,
911978fcca9SJames Morse * RESCTRL_PICK_ANY_CPU to pick any CPU.
912978fcca9SJames Morse */
cqm_setup_limbo_handler(struct rdt_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)913cae2bcb6STony Luck void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
914978fcca9SJames Morse int exclude_cpu)
915fa7d9493SBabu Moger {
916fa7d9493SBabu Moger unsigned long delay = msecs_to_jiffies(delay_ms);
917fa7d9493SBabu Moger int cpu;
918fa7d9493SBabu Moger
919c103d4d4STony Luck cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
920fa7d9493SBabu Moger dom->cqm_work_cpu = cpu;
921fa7d9493SBabu Moger
922978fcca9SJames Morse if (cpu < nr_cpu_ids)
923fa7d9493SBabu Moger schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
924fa7d9493SBabu Moger }
925fa7d9493SBabu Moger
mbm_handle_overflow(struct work_struct * work)926fa7d9493SBabu Moger void mbm_handle_overflow(struct work_struct *work)
927fa7d9493SBabu Moger {
928fa7d9493SBabu Moger unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
929fa7d9493SBabu Moger struct rdtgroup *prgrp, *crgrp;
930cae2bcb6STony Luck struct rdt_mon_domain *d;
931fa7d9493SBabu Moger struct list_head *head;
93246637d45SReinette Chatre struct rdt_resource *r;
933fa7d9493SBabu Moger
934fb700810SJames Morse cpus_read_lock();
935fa7d9493SBabu Moger mutex_lock(&rdtgroup_mutex);
936fa7d9493SBabu Moger
93713e5769dSJames Morse /*
93813e5769dSJames Morse * If the filesystem has been unmounted this work no longer needs to
93913e5769dSJames Morse * run.
94013e5769dSJames Morse */
94130017b60SJames Morse if (!resctrl_mounted || !resctrl_arch_mon_capable())
942fa7d9493SBabu Moger goto out_unlock;
943fa7d9493SBabu Moger
9443c021531SJames Morse r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
945cae2bcb6STony Luck d = container_of(work, struct rdt_mon_domain, mbm_over.work);
946fa7d9493SBabu Moger
947fa7d9493SBabu Moger list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
94840fc735bSJames Morse mbm_update(r, d, prgrp->closid, prgrp->mon.rmid);
949fa7d9493SBabu Moger
950fa7d9493SBabu Moger head = &prgrp->mon.crdtgrp_list;
951fa7d9493SBabu Moger list_for_each_entry(crgrp, head, mon.crdtgrp_list)
95240fc735bSJames Morse mbm_update(r, d, crgrp->closid, crgrp->mon.rmid);
953fa7d9493SBabu Moger
954fa7d9493SBabu Moger if (is_mba_sc(NULL))
955fa7d9493SBabu Moger update_mba_bw(prgrp, d);
956fa7d9493SBabu Moger }
957fa7d9493SBabu Moger
958a4846aafSJames Morse /*
959a4846aafSJames Morse * Re-check for housekeeping CPUs. This allows the overflow handler to
960a4846aafSJames Morse * move off a nohz_full CPU quickly.
961a4846aafSJames Morse */
962c103d4d4STony Luck d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
963978fcca9SJames Morse RESCTRL_PICK_ANY_CPU);
964a4846aafSJames Morse schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
965fa7d9493SBabu Moger
966fa7d9493SBabu Moger out_unlock:
967fa7d9493SBabu Moger mutex_unlock(&rdtgroup_mutex);
968fb700810SJames Morse cpus_read_unlock();
969fa7d9493SBabu Moger }
970fa7d9493SBabu Moger
971978fcca9SJames Morse /**
972978fcca9SJames Morse * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
973978fcca9SJames Morse * domain.
974978fcca9SJames Morse * @dom: The domain the overflow handler should run for.
975978fcca9SJames Morse * @delay_ms: How far in the future the handler should run.
976978fcca9SJames Morse * @exclude_cpu: Which CPU the handler should not run on,
977978fcca9SJames Morse * RESCTRL_PICK_ANY_CPU to pick any CPU.
978978fcca9SJames Morse */
mbm_setup_overflow_handler(struct rdt_mon_domain * dom,unsigned long delay_ms,int exclude_cpu)979cae2bcb6STony Luck void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
980978fcca9SJames Morse int exclude_cpu)
981fa7d9493SBabu Moger {
982fa7d9493SBabu Moger unsigned long delay = msecs_to_jiffies(delay_ms);
983fa7d9493SBabu Moger int cpu;
984fa7d9493SBabu Moger
98513e5769dSJames Morse /*
98613e5769dSJames Morse * When a domain comes online there is no guarantee the filesystem is
98713e5769dSJames Morse * mounted. If not, there is no need to catch counter overflow.
98813e5769dSJames Morse */
98930017b60SJames Morse if (!resctrl_mounted || !resctrl_arch_mon_capable())
990fa7d9493SBabu Moger return;
991c103d4d4STony Luck cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
992fa7d9493SBabu Moger dom->mbm_work_cpu = cpu;
993978fcca9SJames Morse
994978fcca9SJames Morse if (cpu < nr_cpu_ids)
995fa7d9493SBabu Moger schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
996fa7d9493SBabu Moger }
997fa7d9493SBabu Moger
dom_data_init(struct rdt_resource * r)998fa7d9493SBabu Moger static int dom_data_init(struct rdt_resource *r)
999fa7d9493SBabu Moger {
10006791e0eaSJames Morse u32 idx_limit = resctrl_arch_system_num_rmid_idx();
1001b30a55dfSJames Morse u32 num_closid = resctrl_arch_get_num_closid(r);
1002fa7d9493SBabu Moger struct rmid_entry *entry = NULL;
1003b30a55dfSJames Morse int err = 0, i;
10046791e0eaSJames Morse u32 idx;
1005b30a55dfSJames Morse
1006b30a55dfSJames Morse mutex_lock(&rdtgroup_mutex);
1007b30a55dfSJames Morse if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1008b30a55dfSJames Morse u32 *tmp;
1009b30a55dfSJames Morse
1010b30a55dfSJames Morse /*
1011b30a55dfSJames Morse * If the architecture hasn't provided a sanitised value here,
1012b30a55dfSJames Morse * this may result in larger arrays than necessary. Resctrl will
1013b30a55dfSJames Morse * use a smaller system wide value based on the resources in
1014b30a55dfSJames Morse * use.
1015b30a55dfSJames Morse */
1016b30a55dfSJames Morse tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
1017b30a55dfSJames Morse if (!tmp) {
1018b30a55dfSJames Morse err = -ENOMEM;
1019b30a55dfSJames Morse goto out_unlock;
1020b30a55dfSJames Morse }
1021b30a55dfSJames Morse
1022b30a55dfSJames Morse closid_num_dirty_rmid = tmp;
1023b30a55dfSJames Morse }
1024fa7d9493SBabu Moger
10256791e0eaSJames Morse rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
1026b30a55dfSJames Morse if (!rmid_ptrs) {
1027b30a55dfSJames Morse if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1028b30a55dfSJames Morse kfree(closid_num_dirty_rmid);
1029b30a55dfSJames Morse closid_num_dirty_rmid = NULL;
1030b30a55dfSJames Morse }
1031b30a55dfSJames Morse err = -ENOMEM;
1032b30a55dfSJames Morse goto out_unlock;
1033b30a55dfSJames Morse }
1034fa7d9493SBabu Moger
10356791e0eaSJames Morse for (i = 0; i < idx_limit; i++) {
1036fa7d9493SBabu Moger entry = &rmid_ptrs[i];
1037fa7d9493SBabu Moger INIT_LIST_HEAD(&entry->list);
1038fa7d9493SBabu Moger
10396791e0eaSJames Morse resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
1040fa7d9493SBabu Moger list_add_tail(&entry->list, &rmid_free_lru);
1041fa7d9493SBabu Moger }
1042fa7d9493SBabu Moger
1043fa7d9493SBabu Moger /*
104440fc735bSJames Morse * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
104540fc735bSJames Morse * are always allocated. These are used for the rdtgroup_default
10468079565dSJames Morse * control group, which will be setup later in resctrl_init().
1047fa7d9493SBabu Moger */
10486791e0eaSJames Morse idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
10496791e0eaSJames Morse RESCTRL_RESERVED_RMID);
10506791e0eaSJames Morse entry = __rmid_entry(idx);
1051fa7d9493SBabu Moger list_del(&entry->list);
1052fa7d9493SBabu Moger
1053b30a55dfSJames Morse out_unlock:
1054b30a55dfSJames Morse mutex_unlock(&rdtgroup_mutex);
1055b30a55dfSJames Morse
1056b30a55dfSJames Morse return err;
1057fa7d9493SBabu Moger }
1058fa7d9493SBabu Moger
dom_data_exit(struct rdt_resource * r)10594b6bdbf2SJames Morse static void dom_data_exit(struct rdt_resource *r)
10603f7b0738SJames Morse {
10613f7b0738SJames Morse mutex_lock(&rdtgroup_mutex);
10623f7b0738SJames Morse
106301184272SJames Morse if (!r->mon_capable)
106401184272SJames Morse goto out_unlock;
106501184272SJames Morse
1066b30a55dfSJames Morse if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
1067b30a55dfSJames Morse kfree(closid_num_dirty_rmid);
1068b30a55dfSJames Morse closid_num_dirty_rmid = NULL;
1069b30a55dfSJames Morse }
1070b30a55dfSJames Morse
10713f7b0738SJames Morse kfree(rmid_ptrs);
10723f7b0738SJames Morse rmid_ptrs = NULL;
10733f7b0738SJames Morse
107401184272SJames Morse out_unlock:
10753f7b0738SJames Morse mutex_unlock(&rdtgroup_mutex);
10763f7b0738SJames Morse }
10773f7b0738SJames Morse
1078fa7d9493SBabu Moger static struct mon_evt llc_occupancy_event = {
1079fa7d9493SBabu Moger .name = "llc_occupancy",
1080fa7d9493SBabu Moger .evtid = QOS_L3_OCCUP_EVENT_ID,
1081fa7d9493SBabu Moger };
1082fa7d9493SBabu Moger
1083fa7d9493SBabu Moger static struct mon_evt mbm_total_event = {
1084fa7d9493SBabu Moger .name = "mbm_total_bytes",
1085fa7d9493SBabu Moger .evtid = QOS_L3_MBM_TOTAL_EVENT_ID,
1086fa7d9493SBabu Moger };
1087fa7d9493SBabu Moger
1088fa7d9493SBabu Moger static struct mon_evt mbm_local_event = {
1089fa7d9493SBabu Moger .name = "mbm_local_bytes",
1090fa7d9493SBabu Moger .evtid = QOS_L3_MBM_LOCAL_EVENT_ID,
1091fa7d9493SBabu Moger };
1092fa7d9493SBabu Moger
1093fa7d9493SBabu Moger /*
1094fa7d9493SBabu Moger * Initialize the event list for the resource.
1095fa7d9493SBabu Moger *
1096fa7d9493SBabu Moger * Note that MBM events are also part of RDT_RESOURCE_L3 resource
1097fa7d9493SBabu Moger * because as per the SDM the total and local memory bandwidth
1098fa7d9493SBabu Moger * are enumerated as part of L3 monitoring.
1099fa7d9493SBabu Moger */
l3_mon_evt_init(struct rdt_resource * r)1100fa7d9493SBabu Moger static void l3_mon_evt_init(struct rdt_resource *r)
1101fa7d9493SBabu Moger {
1102fa7d9493SBabu Moger INIT_LIST_HEAD(&r->evt_list);
1103fa7d9493SBabu Moger
1104d012b66aSJames Morse if (resctrl_arch_is_llc_occupancy_enabled())
1105fa7d9493SBabu Moger list_add_tail(&llc_occupancy_event.list, &r->evt_list);
1106d012b66aSJames Morse if (resctrl_arch_is_mbm_total_enabled())
1107fa7d9493SBabu Moger list_add_tail(&mbm_total_event.list, &r->evt_list);
1108d012b66aSJames Morse if (resctrl_arch_is_mbm_local_enabled())
1109fa7d9493SBabu Moger list_add_tail(&mbm_local_event.list, &r->evt_list);
1110fa7d9493SBabu Moger }
1111fa7d9493SBabu Moger
111221b362ccSTony Luck /*
111321b362ccSTony Luck * The power-on reset value of MSR_RMID_SNC_CONFIG is 0x1
111421b362ccSTony Luck * which indicates that RMIDs are configured in legacy mode.
111521b362ccSTony Luck * This mode is incompatible with Linux resctrl semantics
111621b362ccSTony Luck * as RMIDs are partitioned between SNC nodes, which requires
111721b362ccSTony Luck * a user to know which RMID is allocated to a task.
111821b362ccSTony Luck * Clearing bit 0 reconfigures the RMID counters for use
111921b362ccSTony Luck * in RMID sharing mode. This mode is better for Linux.
112021b362ccSTony Luck * The RMID space is divided between all SNC nodes with the
112121b362ccSTony Luck * RMIDs renumbered to start from zero in each node when
112221b362ccSTony Luck * counting operations from tasks. Code to read the counters
112321b362ccSTony Luck * must adjust RMID counter numbers based on SNC node. See
112421b362ccSTony Luck * logical_rmid_to_physical_rmid() for code that does this.
112521b362ccSTony Luck */
arch_mon_domain_online(struct rdt_resource * r,struct rdt_mon_domain * d)112621b362ccSTony Luck void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
112721b362ccSTony Luck {
112821b362ccSTony Luck if (snc_nodes_per_l3_cache > 1)
112921b362ccSTony Luck msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
113021b362ccSTony Luck }
113121b362ccSTony Luck
113213488150STony Luck /* CPU models that support MSR_RMID_SNC_CONFIG */
113313488150STony Luck static const struct x86_cpu_id snc_cpu_ids[] __initconst = {
113413488150STony Luck X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
113513488150STony Luck X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, 0),
113613488150STony Luck X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, 0),
113713488150STony Luck X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, 0),
113813488150STony Luck X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, 0),
113913488150STony Luck {}
114013488150STony Luck };
114113488150STony Luck
114213488150STony Luck /*
114313488150STony Luck * There isn't a simple hardware bit that indicates whether a CPU is running
114413488150STony Luck * in Sub-NUMA Cluster (SNC) mode. Infer the state by comparing the
114513488150STony Luck * number of CPUs sharing the L3 cache with CPU0 to the number of CPUs in
114613488150STony Luck * the same NUMA node as CPU0.
114713488150STony Luck * It is not possible to accurately determine SNC state if the system is
114813488150STony Luck * booted with a maxcpus=N parameter. That distorts the ratio of SNC nodes
114913488150STony Luck * to L3 caches. It will be OK if system is booted with hyperthreading
115013488150STony Luck * disabled (since this doesn't affect the ratio).
115113488150STony Luck */
snc_get_config(void)115213488150STony Luck static __init int snc_get_config(void)
115313488150STony Luck {
115413488150STony Luck struct cacheinfo *ci = get_cpu_cacheinfo_level(0, RESCTRL_L3_CACHE);
115513488150STony Luck const cpumask_t *node0_cpumask;
115613488150STony Luck int cpus_per_node, cpus_per_l3;
115713488150STony Luck int ret;
115813488150STony Luck
115913488150STony Luck if (!x86_match_cpu(snc_cpu_ids) || !ci)
116013488150STony Luck return 1;
116113488150STony Luck
116213488150STony Luck cpus_read_lock();
116313488150STony Luck if (num_online_cpus() != num_present_cpus())
116413488150STony Luck pr_warn("Some CPUs offline, SNC detection may be incorrect\n");
116513488150STony Luck cpus_read_unlock();
116613488150STony Luck
116713488150STony Luck node0_cpumask = cpumask_of_node(cpu_to_node(0));
116813488150STony Luck
116913488150STony Luck cpus_per_node = cpumask_weight(node0_cpumask);
117013488150STony Luck cpus_per_l3 = cpumask_weight(&ci->shared_cpu_map);
117113488150STony Luck
117213488150STony Luck if (!cpus_per_node || !cpus_per_l3)
117313488150STony Luck return 1;
117413488150STony Luck
117513488150STony Luck ret = cpus_per_l3 / cpus_per_node;
117613488150STony Luck
11779bce6e94STony Luck /* sanity check: Only valid results are 1, 2, 3, 4, 6 */
117813488150STony Luck switch (ret) {
117913488150STony Luck case 1:
118013488150STony Luck break;
118113488150STony Luck case 2 ... 4:
11829bce6e94STony Luck case 6:
118313488150STony Luck pr_info("Sub-NUMA Cluster mode detected with %d nodes per L3 cache\n", ret);
118413488150STony Luck rdt_resources_all[RDT_RESOURCE_L3].r_resctrl.mon_scope = RESCTRL_L3_NODE;
118513488150STony Luck break;
118613488150STony Luck default:
118713488150STony Luck pr_warn("Ignore improbable SNC node count %d\n", ret);
118813488150STony Luck ret = 1;
118913488150STony Luck break;
119013488150STony Luck }
119113488150STony Luck
119213488150STony Luck return ret;
119313488150STony Luck }
119413488150STony Luck
11954b6bdbf2SJames Morse /**
11964b6bdbf2SJames Morse * resctrl_mon_resource_init() - Initialise global monitoring structures.
11974b6bdbf2SJames Morse *
11984b6bdbf2SJames Morse * Allocate and initialise global monitor resources that do not belong to a
11994b6bdbf2SJames Morse * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
12004b6bdbf2SJames Morse * Called once during boot after the struct rdt_resource's have been configured
12014b6bdbf2SJames Morse * but before the filesystem is mounted.
12024b6bdbf2SJames Morse * Resctrl's cpuhp callbacks may be called before this point to bring a domain
12034b6bdbf2SJames Morse * online.
12044b6bdbf2SJames Morse *
12054b6bdbf2SJames Morse * Returns 0 for success, or -ENOMEM.
12064b6bdbf2SJames Morse */
resctrl_mon_resource_init(void)12074b6bdbf2SJames Morse int __init resctrl_mon_resource_init(void)
12084b6bdbf2SJames Morse {
12094b6bdbf2SJames Morse struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
12104b6bdbf2SJames Morse int ret;
12114b6bdbf2SJames Morse
12124b6bdbf2SJames Morse if (!r->mon_capable)
12134b6bdbf2SJames Morse return 0;
12144b6bdbf2SJames Morse
12154b6bdbf2SJames Morse ret = dom_data_init(r);
12164b6bdbf2SJames Morse if (ret)
12174b6bdbf2SJames Morse return ret;
12184b6bdbf2SJames Morse
12194b6bdbf2SJames Morse l3_mon_evt_init(r);
12204b6bdbf2SJames Morse
1221d81826f8SJames Morse if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
1222d81826f8SJames Morse mbm_total_event.configurable = true;
1223d81826f8SJames Morse resctrl_file_fflags_init("mbm_total_bytes_config",
1224d81826f8SJames Morse RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1225d81826f8SJames Morse }
1226d81826f8SJames Morse if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
1227d81826f8SJames Morse mbm_local_event.configurable = true;
1228d81826f8SJames Morse resctrl_file_fflags_init("mbm_local_bytes_config",
1229d81826f8SJames Morse RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
1230d81826f8SJames Morse }
1231d81826f8SJames Morse
123237bae175SJames Morse if (resctrl_arch_is_mbm_local_enabled())
123337bae175SJames Morse mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
123437bae175SJames Morse else if (resctrl_arch_is_mbm_total_enabled())
123537bae175SJames Morse mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
123637bae175SJames Morse
12374b6bdbf2SJames Morse return 0;
12384b6bdbf2SJames Morse }
12394b6bdbf2SJames Morse
rdt_get_mon_l3_config(struct rdt_resource * r)1240bd334c86SBabu Moger int __init rdt_get_mon_l3_config(struct rdt_resource *r)
1241fa7d9493SBabu Moger {
12420c4d5ba1SReinette Chatre unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
124363c8b123SJames Morse struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1244ae2328b5SJames Morse unsigned int threshold;
1245fa7d9493SBabu Moger
124613488150STony Luck snc_nodes_per_l3_cache = snc_get_config();
124713488150STony Luck
1248d80975e2SJames Morse resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024;
1249e13db55bSTony Luck hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale / snc_nodes_per_l3_cache;
1250e13db55bSTony Luck r->num_rmid = (boot_cpu_data.x86_cache_max_rmid + 1) / snc_nodes_per_l3_cache;
125163c8b123SJames Morse hw_res->mbm_width = MBM_CNTR_WIDTH_BASE;
12520c4d5ba1SReinette Chatre
12530c4d5ba1SReinette Chatre if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX)
125463c8b123SJames Morse hw_res->mbm_width += mbm_offset;
12550c4d5ba1SReinette Chatre else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX)
12560c4d5ba1SReinette Chatre pr_warn("Ignoring impossible MBM counter offset\n");
1257fa7d9493SBabu Moger
1258fa7d9493SBabu Moger /*
1259fa7d9493SBabu Moger * A reasonable upper limit on the max threshold is the number
1260fa7d9493SBabu Moger * of lines tagged per RMID if all RMIDs have the same number of
1261fa7d9493SBabu Moger * lines tagged in the LLC.
1262fa7d9493SBabu Moger *
1263fa7d9493SBabu Moger * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
1264fa7d9493SBabu Moger */
1265d80975e2SJames Morse threshold = resctrl_rmid_realloc_limit / r->num_rmid;
1266fa7d9493SBabu Moger
1267ae2328b5SJames Morse /*
1268ae2328b5SJames Morse * Because num_rmid may not be a power of two, round the value
1269ae2328b5SJames Morse * to the nearest multiple of hw_res->mon_scale so it matches a
1270ae2328b5SJames Morse * value the hardware will measure. mon_scale may not be a power of 2.
1271ae2328b5SJames Morse */
1272ae2328b5SJames Morse resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold);
1273fa7d9493SBabu Moger
1274d507f83cSBabu Moger if (rdt_cpu_has(X86_FEATURE_BMEC)) {
127554e35eb8SBabu Moger u32 eax, ebx, ecx, edx;
127654e35eb8SBabu Moger
127754e35eb8SBabu Moger /* Detect list of bandwidth sources that can be tracked */
127854e35eb8SBabu Moger cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
1279c32a7d77SJames Morse r->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
1280d507f83cSBabu Moger }
1281d507f83cSBabu Moger
1282fa7d9493SBabu Moger r->mon_capable = true;
1283fa7d9493SBabu Moger
1284fa7d9493SBabu Moger return 0;
1285fa7d9493SBabu Moger }
12864868a61dSFenghua Yu
resctrl_mon_resource_exit(void)12874b6bdbf2SJames Morse void resctrl_mon_resource_exit(void)
12883f7b0738SJames Morse {
128901184272SJames Morse struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
129001184272SJames Morse
129101184272SJames Morse dom_data_exit(r);
12923f7b0738SJames Morse }
12933f7b0738SJames Morse
intel_rdt_mbm_apply_quirk(void)12944868a61dSFenghua Yu void __init intel_rdt_mbm_apply_quirk(void)
12954868a61dSFenghua Yu {
12964868a61dSFenghua Yu int cf_index;
12974868a61dSFenghua Yu
12984868a61dSFenghua Yu cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1;
12994868a61dSFenghua Yu if (cf_index >= ARRAY_SIZE(mbm_cf_table)) {
13004868a61dSFenghua Yu pr_info("No MBM correction factor available\n");
13014868a61dSFenghua Yu return;
13024868a61dSFenghua Yu }
13034868a61dSFenghua Yu
13044868a61dSFenghua Yu mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
13054868a61dSFenghua Yu mbm_cf = mbm_cf_table[cf_index].cf;
13064868a61dSFenghua Yu }
1307