1 /*-
2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions, and the following disclaimer,
10 * without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 * substantially similar to the "NO WARRANTY" disclaimer below
13 * ("Disclaimer") and any redistribution must be conditioned upon
14 * including a substantially similar Disclaimer requirement for further
15 * binary redistribution.
16 *
17 * NO WARRANTY
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
29 *
30 * Authors: Justin T. Gibbs (Spectra Logic Corporation)
31 */
32
33 /**
34 * \file zfsd_event.cc
35 */
36 #include <sys/cdefs.h>
37 #include <sys/byteorder.h>
38 #include <sys/time.h>
39 #include <sys/fs/zfs.h>
40 #include <sys/vdev_impl.h>
41
42 #include <syslog.h>
43
44 #include <libzfs.h>
45 #include <libzutil.h>
46 /*
47 * Undefine flush, defined by cpufunc.h on sparc64, because it conflicts with
48 * C++ flush methods
49 */
50 #undef flush
51 #undef __init
52 #include <list>
53 #include <map>
54 #include <sstream>
55 #include <string>
56
57 #include <devdctl/guid.h>
58 #include <devdctl/event.h>
59 #include <devdctl/event_factory.h>
60 #include <devdctl/exception.h>
61 #include <devdctl/consumer.h>
62
63 #include "callout.h"
64 #include "vdev_iterator.h"
65 #include "zfsd_event.h"
66 #include "case_file.h"
67 #include "vdev.h"
68 #include "zfsd.h"
69 #include "zfsd_exception.h"
70 #include "zpool_list.h"
71
72 __FBSDID("$FreeBSD$");
73 /*============================ Namespace Control =============================*/
74 using DevdCtl::Event;
75 using DevdCtl::Guid;
76 using DevdCtl::NVPairMap;
77 using std::stringstream;
78
79 /*=========================== Class Implementations ==========================*/
80
81 /*-------------------------------- GeomEvent --------------------------------*/
82
83 //- GeomEvent Static Public Methods -------------------------------------------
84 Event *
Builder(Event::Type type,NVPairMap & nvPairs,const string & eventString)85 GeomEvent::Builder(Event::Type type,
86 NVPairMap &nvPairs,
87 const string &eventString)
88 {
89 return (new GeomEvent(type, nvPairs, eventString));
90 }
91
92 //- GeomEvent Virtual Public Methods ------------------------------------------
93 Event *
DeepCopy() const94 GeomEvent::DeepCopy() const
95 {
96 return (new GeomEvent(*this));
97 }
98
99 bool
Process() const100 GeomEvent::Process() const
101 {
102 /*
103 * We only use GEOM events to repair damaged pools. So return early if
104 * there are no damaged pools
105 */
106 if (CaseFile::Empty())
107 return (false);
108
109 /*
110 * We are only concerned with arrivals and physical path changes,
111 * because those can be used to satisfy online and autoreplace
112 * operations
113 */
114 if (Value("type") != "GEOM::physpath" && Value("type") != "CREATE")
115 return (false);
116
117 /* Log the event since it is of interest. */
118 Log(LOG_INFO);
119
120 string devPath;
121 if (!DevPath(devPath))
122 return (false);
123
124 int devFd(open(devPath.c_str(), O_RDONLY));
125 if (devFd == -1)
126 return (false);
127
128 bool inUse;
129 bool degraded;
130 nvlist_t *devLabel(ReadLabel(devFd, inUse, degraded));
131
132 string physPath;
133 bool havePhysPath(PhysicalPath(physPath));
134
135 string devName;
136 DevName(devName);
137 close(devFd);
138
139 if (inUse && devLabel != NULL) {
140 OnlineByLabel(devPath, physPath, devLabel);
141 } else if (degraded) {
142 syslog(LOG_INFO, "%s is marked degraded. Ignoring "
143 "as a replace by physical path candidate.\n",
144 devName.c_str());
145 } else if (havePhysPath) {
146 /*
147 * TODO: attempt to resolve events using every casefile
148 * that matches this physpath
149 */
150 CaseFile *caseFile(CaseFile::Find(physPath));
151 if (caseFile != NULL) {
152 syslog(LOG_INFO,
153 "Found CaseFile(%s:%s:%s) - ReEvaluating\n",
154 caseFile->PoolGUIDString().c_str(),
155 caseFile->VdevGUIDString().c_str(),
156 zpool_state_to_name(caseFile->VdevState(),
157 VDEV_AUX_NONE));
158 caseFile->ReEvaluate(devPath, physPath, /*vdev*/NULL);
159 }
160 }
161 return (false);
162 }
163
164 //- GeomEvent Protected Methods -----------------------------------------------
GeomEvent(Event::Type type,NVPairMap & nvpairs,const string & eventString)165 GeomEvent::GeomEvent(Event::Type type, NVPairMap &nvpairs,
166 const string &eventString)
167 : DevdCtl::GeomEvent(type, nvpairs, eventString)
168 {
169 }
170
GeomEvent(const GeomEvent & src)171 GeomEvent::GeomEvent(const GeomEvent &src)
172 : DevdCtl::GeomEvent::GeomEvent(src)
173 {
174 }
175
176 nvlist_t *
ReadLabel(int devFd,bool & inUse,bool & degraded)177 GeomEvent::ReadLabel(int devFd, bool &inUse, bool °raded)
178 {
179 pool_state_t poolState;
180 char *poolName;
181 boolean_t b_inuse;
182 int nlabels;
183
184 inUse = false;
185 degraded = false;
186 poolName = NULL;
187 if (zpool_in_use(g_zfsHandle, devFd, &poolState,
188 &poolName, &b_inuse) == 0) {
189 nvlist_t *devLabel = NULL;
190
191 inUse = b_inuse == B_TRUE;
192 if (poolName != NULL)
193 free(poolName);
194
195 if (zpool_read_label(devFd, &devLabel, &nlabels) != 0)
196 return (NULL);
197 /*
198 * If we find a disk with fewer than the maximum number of
199 * labels, it might be the whole disk of a partitioned disk
200 * where ZFS resides on a partition. In that case, we should do
201 * nothing and wait for the partition to appear. Or, the disk
202 * might be damaged. In that case, zfsd should do nothing and
203 * wait for the sysadmin to decide.
204 */
205 if (nlabels != VDEV_LABELS || devLabel == NULL) {
206 nvlist_free(devLabel);
207 return (NULL);
208 }
209
210 try {
211 Vdev vdev(devLabel);
212 degraded = vdev.State() != VDEV_STATE_HEALTHY;
213 return (devLabel);
214 } catch (ZfsdException &exp) {
215 string devName = fdevname(devFd);
216 string devPath = _PATH_DEV + devName;
217 string context("GeomEvent::ReadLabel: "
218 + devPath + ": ");
219
220 exp.GetString().insert(0, context);
221 exp.Log();
222 nvlist_free(devLabel);
223 }
224 }
225 return (NULL);
226 }
227
228 bool
OnlineByLabel(const string & devPath,const string & physPath,nvlist_t * devConfig)229 GeomEvent::OnlineByLabel(const string &devPath, const string& physPath,
230 nvlist_t *devConfig)
231 {
232 try {
233 /*
234 * A device with ZFS label information has been
235 * inserted. If it matches a device for which we
236 * have a case, see if we can solve that case.
237 */
238 syslog(LOG_INFO, "Interrogating VDEV label for %s\n",
239 devPath.c_str());
240 Vdev vdev(devConfig);
241 CaseFile *caseFile(CaseFile::Find(vdev.PoolGUID(),
242 vdev.GUID()));
243 if (caseFile != NULL)
244 return (caseFile->ReEvaluate(devPath, physPath, &vdev));
245
246 } catch (ZfsdException &exp) {
247 string context("GeomEvent::OnlineByLabel: " + devPath + ": ");
248
249 exp.GetString().insert(0, context);
250 exp.Log();
251 }
252 return (false);
253 }
254
255
256 /*--------------------------------- ZfsEvent ---------------------------------*/
257 //- ZfsEvent Static Public Methods ---------------------------------------------
258 DevdCtl::Event *
Builder(Event::Type type,NVPairMap & nvpairs,const string & eventString)259 ZfsEvent::Builder(Event::Type type, NVPairMap &nvpairs,
260 const string &eventString)
261 {
262 return (new ZfsEvent(type, nvpairs, eventString));
263 }
264
265 //- ZfsEvent Virtual Public Methods --------------------------------------------
266 Event *
DeepCopy() const267 ZfsEvent::DeepCopy() const
268 {
269 return (new ZfsEvent(*this));
270 }
271
272 bool
Process() const273 ZfsEvent::Process() const
274 {
275 string logstr("");
276
277 if (!Contains("class") && !Contains("type")) {
278 syslog(LOG_ERR,
279 "ZfsEvent::Process: Missing class or type data.");
280 return (false);
281 }
282
283 /* On config syncs, replay any queued events first. */
284 if (Value("type").find("misc.fs.zfs.config_sync") == 0) {
285 /*
286 * Even if saved events are unconsumed the second time
287 * around, drop them. Any events that still can't be
288 * consumed are probably referring to vdevs or pools that
289 * no longer exist.
290 */
291 ZfsDaemon::Get().ReplayUnconsumedEvents(/*discard*/true);
292 CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
293 }
294
295 if (Value("type").find("misc.fs.zfs.") == 0) {
296 /* Configuration changes, resilver events, etc. */
297 ProcessPoolEvent();
298 return (false);
299 }
300
301 if (!Contains("pool_guid") || !Contains("vdev_guid")) {
302 /* Only currently interested in Vdev related events. */
303 return (false);
304 }
305
306 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
307 if (caseFile != NULL) {
308 Log(LOG_INFO);
309 syslog(LOG_INFO, "Evaluating existing case file\n");
310 caseFile->ReEvaluate(*this);
311 return (false);
312 }
313
314 /* Skip events that can't be handled. */
315 Guid poolGUID(PoolGUID());
316 /* If there are no replicas for a pool, then it's not manageable. */
317 if (Value("class").find("fs.zfs.vdev.no_replicas") == 0) {
318 stringstream msg;
319 msg << "No replicas available for pool " << poolGUID;
320 msg << ", ignoring";
321 Log(LOG_INFO);
322 syslog(LOG_INFO, "%s", msg.str().c_str());
323 return (false);
324 }
325
326 /*
327 * Create a case file for this vdev, and have it
328 * evaluate the event.
329 */
330 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
331 if (zpl.empty()) {
332 stringstream msg;
333 int priority = LOG_INFO;
334 msg << "ZfsEvent::Process: Event for unknown pool ";
335 msg << poolGUID << " ";
336 msg << "queued";
337 Log(LOG_INFO);
338 syslog(priority, "%s", msg.str().c_str());
339 return (true);
340 }
341
342 nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
343 if (vdevConfig == NULL) {
344 stringstream msg;
345 int priority = LOG_INFO;
346 msg << "ZfsEvent::Process: Event for unknown vdev ";
347 msg << VdevGUID() << " ";
348 msg << "queued";
349 Log(LOG_INFO);
350 syslog(priority, "%s", msg.str().c_str());
351 return (true);
352 }
353
354 Vdev vdev(zpl.front(), vdevConfig);
355 caseFile = &CaseFile::Create(vdev);
356 if (caseFile->ReEvaluate(*this) == false) {
357 stringstream msg;
358 int priority = LOG_INFO;
359 msg << "ZfsEvent::Process: Unconsumed event for vdev(";
360 msg << zpool_get_name(zpl.front()) << ",";
361 msg << vdev.GUID() << ") ";
362 msg << "queued";
363 Log(LOG_INFO);
364 syslog(priority, "%s", msg.str().c_str());
365 return (true);
366 }
367 return (false);
368 }
369
370 //- ZfsEvent Protected Methods -------------------------------------------------
ZfsEvent(Event::Type type,NVPairMap & nvpairs,const string & eventString)371 ZfsEvent::ZfsEvent(Event::Type type, NVPairMap &nvpairs,
372 const string &eventString)
373 : DevdCtl::ZfsEvent(type, nvpairs, eventString)
374 {
375 }
376
ZfsEvent(const ZfsEvent & src)377 ZfsEvent::ZfsEvent(const ZfsEvent &src)
378 : DevdCtl::ZfsEvent(src)
379 {
380 }
381
382 /*
383 * Sometimes the kernel won't detach a spare when it is no longer needed. This
384 * can happen for example if a drive is removed, then either the pool is
385 * exported or the machine is powered off, then the drive is reinserted, then
386 * the machine is powered on or the pool is imported. ZFSD must detach these
387 * spares itself.
388 */
389 void
CleanupSpares() const390 ZfsEvent::CleanupSpares() const
391 {
392 Guid poolGUID(PoolGUID());
393 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
394 if (!zpl.empty()) {
395 zpool_handle_t* hdl;
396
397 hdl = zpl.front();
398 VdevIterator(hdl).Each(TryDetach, (void*)hdl);
399 }
400 }
401
402 void
ProcessPoolEvent() const403 ZfsEvent::ProcessPoolEvent() const
404 {
405 bool degradedDevice(false);
406
407 /* The pool is destroyed. Discard any open cases */
408 if (Value("type") == "misc.fs.zfs.pool_destroy") {
409 Log(LOG_INFO);
410 CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
411 return;
412 }
413
414 CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
415 if (caseFile != NULL) {
416 if (caseFile->VdevState() != VDEV_STATE_UNKNOWN
417 && caseFile->VdevState() < VDEV_STATE_HEALTHY)
418 degradedDevice = true;
419
420 Log(LOG_INFO);
421 caseFile->ReEvaluate(*this);
422 }
423 else if (Value("type") == "misc.fs.zfs.resilver_finish")
424 {
425 /*
426 * It's possible to get a resilver_finish event with no
427 * corresponding casefile. For example, if a damaged pool were
428 * exported, repaired, then reimported.
429 */
430 Log(LOG_INFO);
431 CleanupSpares();
432 }
433
434 if (Value("type") == "misc.fs.zfs.vdev_remove"
435 && degradedDevice == false) {
436
437 /* See if any other cases can make use of this device. */
438 Log(LOG_INFO);
439 ZfsDaemon::RequestSystemRescan();
440 }
441 }
442
443 bool
TryDetach(Vdev & vdev,void * cbArg)444 ZfsEvent::TryDetach(Vdev &vdev, void *cbArg)
445 {
446 /*
447 * Outline:
448 * if this device is a spare, and its parent includes one healthy,
449 * non-spare child, then detach this device.
450 */
451 zpool_handle_t *hdl(static_cast<zpool_handle_t*>(cbArg));
452
453 if (vdev.IsSpare()) {
454 std::list<Vdev> siblings;
455 std::list<Vdev>::iterator siblings_it;
456 boolean_t cleanup = B_FALSE;
457
458 Vdev parent = vdev.Parent();
459 siblings = parent.Children();
460
461 /* Determine whether the parent should be cleaned up */
462 for (siblings_it = siblings.begin();
463 siblings_it != siblings.end();
464 siblings_it++) {
465 Vdev sibling = *siblings_it;
466
467 if (!sibling.IsSpare() &&
468 sibling.State() == VDEV_STATE_HEALTHY) {
469 cleanup = B_TRUE;
470 break;
471 }
472 }
473
474 if (cleanup) {
475 syslog(LOG_INFO, "Detaching spare vdev %s from pool %s",
476 vdev.Path().c_str(), zpool_get_name(hdl));
477 zpool_vdev_detach(hdl, vdev.Path().c_str());
478 }
479
480 }
481
482 /* Always return false, because there may be other spares to detach */
483 return (false);
484 }
485