1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007, 2008 Rui Paulo <[email protected]>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Device driver for Intel's On Die thermal sensor via MSR.
31 * First introduced in Intel's Core line of processors.
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h> /* for curthread */
45 #include <sys/smp.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48
49 #include <machine/specialreg.h>
50 #include <machine/cpufunc.h>
51 #include <machine/cputypes.h>
52 #include <machine/md_var.h>
53
54 #define TZ_ZEROC 2731
55
56 #define THERM_STATUS_LOG 0x02
57 #define THERM_STATUS 0x01
58 #define THERM_STATUS_TEMP_SHIFT 16
59 #define THERM_STATUS_TEMP_MASK 0x7f
60 #define THERM_STATUS_RES_SHIFT 27
61 #define THERM_STATUS_RES_MASK 0x0f
62 #define THERM_STATUS_VALID_SHIFT 31
63 #define THERM_STATUS_VALID_MASK 0x01
64
65 struct coretemp_softc {
66 device_t sc_dev;
67 int sc_tjmax;
68 unsigned int sc_throttle_log;
69 };
70
71 /*
72 * Device methods.
73 */
74 static void coretemp_identify(driver_t *driver, device_t parent);
75 static int coretemp_probe(device_t dev);
76 static int coretemp_attach(device_t dev);
77 static int coretemp_detach(device_t dev);
78
79 static uint64_t coretemp_get_thermal_msr(int cpu);
80 static void coretemp_clear_thermal_msr(int cpu);
81 static int coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS);
82 static int coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS);
83
84 static device_method_t coretemp_methods[] = {
85 /* Device interface */
86 DEVMETHOD(device_identify, coretemp_identify),
87 DEVMETHOD(device_probe, coretemp_probe),
88 DEVMETHOD(device_attach, coretemp_attach),
89 DEVMETHOD(device_detach, coretemp_detach),
90
91 DEVMETHOD_END
92 };
93
94 static driver_t coretemp_driver = {
95 "coretemp",
96 coretemp_methods,
97 sizeof(struct coretemp_softc),
98 };
99
100 enum therm_info {
101 CORETEMP_TEMP,
102 CORETEMP_DELTA,
103 CORETEMP_RESOLUTION,
104 CORETEMP_TJMAX,
105 };
106
107 static devclass_t coretemp_devclass;
108 DRIVER_MODULE(coretemp, cpu, coretemp_driver, coretemp_devclass, NULL,
109 NULL);
110
111 static void
coretemp_identify(driver_t * driver,device_t parent)112 coretemp_identify(driver_t *driver, device_t parent)
113 {
114 device_t child;
115 u_int regs[4];
116
117 /* Make sure we're not being doubly invoked. */
118 if (device_find_child(parent, "coretemp", -1) != NULL)
119 return;
120
121 /* Check that CPUID 0x06 is supported and the vendor is Intel.*/
122 if (cpu_high < 6 || cpu_vendor_id != CPU_VENDOR_INTEL)
123 return;
124 /*
125 * CPUID 0x06 returns 1 if the processor has on-die thermal
126 * sensors. EBX[0:3] contains the number of sensors.
127 */
128 do_cpuid(0x06, regs);
129 if ((regs[0] & 0x1) != 1)
130 return;
131
132 /*
133 * We add a child for each CPU since settings must be performed
134 * on each CPU in the SMP case.
135 */
136 child = device_add_child(parent, "coretemp", device_get_unit(parent));
137 if (child == NULL)
138 device_printf(parent, "add coretemp child failed\n");
139 }
140
141 static int
coretemp_probe(device_t dev)142 coretemp_probe(device_t dev)
143 {
144 if (resource_disabled("coretemp", 0))
145 return (ENXIO);
146
147 device_set_desc(dev, "CPU On-Die Thermal Sensors");
148
149 if (!bootverbose && device_get_unit(dev) != 0)
150 device_quiet(dev);
151
152 return (BUS_PROBE_GENERIC);
153 }
154
155 static int
coretemp_attach(device_t dev)156 coretemp_attach(device_t dev)
157 {
158 struct coretemp_softc *sc = device_get_softc(dev);
159 device_t pdev;
160 uint64_t msr;
161 int cpu_model, cpu_stepping;
162 int ret, tjtarget;
163 struct sysctl_oid *oid;
164 struct sysctl_ctx_list *ctx;
165
166 sc->sc_dev = dev;
167 pdev = device_get_parent(dev);
168 cpu_model = CPUID_TO_MODEL(cpu_id);
169 cpu_stepping = CPUID_TO_STEPPING(cpu_id);
170
171 /*
172 * Some CPUs, namely the PIII, don't have thermal sensors, but
173 * report them when the CPUID check is performed in
174 * coretemp_identify(). This leads to a later GPF when the sensor
175 * is queried via a MSR, so we stop here.
176 */
177 if (cpu_model < 0xe)
178 return (ENXIO);
179
180 #if 0 /*
181 * XXXrpaulo: I have this CPU model and when it returns from C3
182 * coretemp continues to function properly.
183 */
184
185 /*
186 * Check for errata AE18.
187 * "Processor Digital Thermal Sensor (DTS) Readout stops
188 * updating upon returning from C3/C4 state."
189 *
190 * Adapted from the Linux coretemp driver.
191 */
192 if (cpu_model == 0xe && cpu_stepping < 0xc) {
193 msr = rdmsr(MSR_BIOS_SIGN);
194 msr = msr >> 32;
195 if (msr < 0x39) {
196 device_printf(dev, "not supported (Intel errata "
197 "AE18), try updating your BIOS\n");
198 return (ENXIO);
199 }
200 }
201 #endif
202
203 /*
204 * Use 100C as the initial value.
205 */
206 sc->sc_tjmax = 100;
207
208 if ((cpu_model == 0xf && cpu_stepping >= 2) || cpu_model == 0xe) {
209 /*
210 * On some Core 2 CPUs, there's an undocumented MSR that
211 * can tell us if Tj(max) is 100 or 85.
212 *
213 * The if-clause for CPUs having the MSR_IA32_EXT_CONFIG was adapted
214 * from the Linux coretemp driver.
215 */
216 msr = rdmsr(MSR_IA32_EXT_CONFIG);
217 if (msr & (1 << 30))
218 sc->sc_tjmax = 85;
219 } else if (cpu_model == 0x17) {
220 switch (cpu_stepping) {
221 case 0x6: /* Mobile Core 2 Duo */
222 sc->sc_tjmax = 105;
223 break;
224 default: /* Unknown stepping */
225 break;
226 }
227 } else if (cpu_model == 0x1c) {
228 switch (cpu_stepping) {
229 case 0xa: /* 45nm Atom D400, N400 and D500 series */
230 sc->sc_tjmax = 100;
231 break;
232 default:
233 sc->sc_tjmax = 90;
234 break;
235 }
236 } else {
237 /*
238 * Attempt to get Tj(max) from MSR IA32_TEMPERATURE_TARGET.
239 *
240 * This method is described in Intel white paper "CPU
241 * Monitoring With DTS/PECI". (#322683)
242 */
243 ret = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &msr);
244 if (ret == 0) {
245 tjtarget = (msr >> 16) & 0xff;
246
247 /*
248 * On earlier generation of processors, the value
249 * obtained from IA32_TEMPERATURE_TARGET register is
250 * an offset that needs to be summed with a model
251 * specific base. It is however not clear what
252 * these numbers are, with the publicly available
253 * documents from Intel.
254 *
255 * For now, we consider [70, 110]C range, as
256 * described in #322683, as "reasonable" and accept
257 * these values whenever the MSR is available for
258 * read, regardless the CPU model.
259 */
260 if (tjtarget >= 70 && tjtarget <= 110)
261 sc->sc_tjmax = tjtarget;
262 else
263 device_printf(dev, "Tj(target) value %d "
264 "does not seem right.\n", tjtarget);
265 } else
266 device_printf(dev, "Can not get Tj(target) "
267 "from your CPU, using 100C.\n");
268 }
269
270 if (bootverbose)
271 device_printf(dev, "Setting TjMax=%d\n", sc->sc_tjmax);
272
273 ctx = device_get_sysctl_ctx(dev);
274
275 oid = SYSCTL_ADD_NODE(ctx,
276 SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)), OID_AUTO,
277 "coretemp", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
278 "Per-CPU thermal information");
279
280 /*
281 * Add the MIBs to dev.cpu.N and dev.cpu.N.coretemp.
282 */
283 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)),
284 OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
285 dev, CORETEMP_TEMP, coretemp_get_val_sysctl, "IK",
286 "Current temperature");
287 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "delta",
288 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_DELTA,
289 coretemp_get_val_sysctl, "I",
290 "Delta between TCC activation and current temperature");
291 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "resolution",
292 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_RESOLUTION,
293 coretemp_get_val_sysctl, "I",
294 "Resolution of CPU thermal sensor");
295 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "tjmax",
296 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CORETEMP_TJMAX,
297 coretemp_get_val_sysctl, "IK",
298 "TCC activation temperature");
299 SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
300 "throttle_log", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, dev, 0,
301 coretemp_throttle_log_sysctl, "I",
302 "Set to 1 if the thermal sensor has tripped");
303
304 return (0);
305 }
306
307 static int
coretemp_detach(device_t dev)308 coretemp_detach(device_t dev)
309 {
310 return (0);
311 }
312
313 struct coretemp_args {
314 u_int msr;
315 uint64_t val;
316 };
317
318 /*
319 * The digital temperature reading is located at bit 16
320 * of MSR_THERM_STATUS.
321 *
322 * There is a bit on that MSR that indicates whether the
323 * temperature is valid or not.
324 *
325 * The temperature is computed by subtracting the temperature
326 * reading by Tj(max).
327 */
328 static uint64_t
coretemp_get_thermal_msr(int cpu)329 coretemp_get_thermal_msr(int cpu)
330 {
331 uint64_t res;
332
333 x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_READ |
334 MSR_OP_CPUID(cpu), 0, &res);
335 return (res);
336 }
337
338 static void
coretemp_clear_thermal_msr(int cpu)339 coretemp_clear_thermal_msr(int cpu)
340 {
341 x86_msr_op(MSR_THERM_STATUS, MSR_OP_RENDEZVOUS_ONE | MSR_OP_WRITE |
342 MSR_OP_CPUID(cpu), 0, NULL);
343 }
344
345 static int
coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS)346 coretemp_get_val_sysctl(SYSCTL_HANDLER_ARGS)
347 {
348 device_t dev;
349 uint64_t msr;
350 int val, tmp;
351 struct coretemp_softc *sc;
352 enum therm_info type;
353 char stemp[16];
354
355 dev = (device_t) arg1;
356 msr = coretemp_get_thermal_msr(device_get_unit(dev));
357 sc = device_get_softc(dev);
358 type = arg2;
359
360 if (((msr >> THERM_STATUS_VALID_SHIFT) & THERM_STATUS_VALID_MASK) != 1) {
361 val = -1;
362 } else {
363 switch (type) {
364 case CORETEMP_TEMP:
365 tmp = (msr >> THERM_STATUS_TEMP_SHIFT) &
366 THERM_STATUS_TEMP_MASK;
367 val = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC;
368 break;
369 case CORETEMP_DELTA:
370 val = (msr >> THERM_STATUS_TEMP_SHIFT) &
371 THERM_STATUS_TEMP_MASK;
372 break;
373 case CORETEMP_RESOLUTION:
374 val = (msr >> THERM_STATUS_RES_SHIFT) &
375 THERM_STATUS_RES_MASK;
376 break;
377 case CORETEMP_TJMAX:
378 val = sc->sc_tjmax * 10 + TZ_ZEROC;
379 break;
380 }
381 }
382
383 if (msr & THERM_STATUS_LOG) {
384 coretemp_clear_thermal_msr(device_get_unit(dev));
385 sc->sc_throttle_log = 1;
386
387 /*
388 * Check for Critical Temperature Status and Critical
389 * Temperature Log. It doesn't really matter if the
390 * current temperature is invalid because the "Critical
391 * Temperature Log" bit will tell us if the Critical
392 * Temperature has * been reached in past. It's not
393 * directly related to the current temperature.
394 *
395 * If we reach a critical level, allow devctl(4)
396 * to catch this and shutdown the system.
397 */
398 if (msr & THERM_STATUS) {
399 tmp = (msr >> THERM_STATUS_TEMP_SHIFT) &
400 THERM_STATUS_TEMP_MASK;
401 tmp = (sc->sc_tjmax - tmp) * 10 + TZ_ZEROC;
402 device_printf(dev, "critical temperature detected, "
403 "suggest system shutdown\n");
404 snprintf(stemp, sizeof(stemp), "%d", tmp);
405 devctl_notify("coretemp", "Thermal", stemp,
406 "notify=0xcc");
407 }
408 }
409
410 return (sysctl_handle_int(oidp, &val, 0, req));
411 }
412
413 static int
coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS)414 coretemp_throttle_log_sysctl(SYSCTL_HANDLER_ARGS)
415 {
416 device_t dev;
417 uint64_t msr;
418 int error, val;
419 struct coretemp_softc *sc;
420
421 dev = (device_t) arg1;
422 msr = coretemp_get_thermal_msr(device_get_unit(dev));
423 sc = device_get_softc(dev);
424
425 if (msr & THERM_STATUS_LOG) {
426 coretemp_clear_thermal_msr(device_get_unit(dev));
427 sc->sc_throttle_log = 1;
428 }
429
430 val = sc->sc_throttle_log;
431
432 error = sysctl_handle_int(oidp, &val, 0, req);
433
434 if (error || !req->newptr)
435 return (error);
436 else if (val != 0)
437 return (EINVAL);
438
439 coretemp_clear_thermal_msr(device_get_unit(dev));
440 sc->sc_throttle_log = 0;
441
442 return (0);
443 }
444