1 /* 2 * Bad block management 3 * 4 * - Heavily based on MD badblocks code from Neil Brown 5 * 6 * Copyright (c) 2015, Intel Corporation. 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 */ 17 18 #include <linux/badblocks.h> 19 #include <linux/seqlock.h> 20 #include <linux/device.h> 21 #include <linux/kernel.h> 22 #include <linux/module.h> 23 #include <linux/stddef.h> 24 #include <linux/types.h> 25 #include <linux/slab.h> 26 27 /** 28 * badblocks_check() - check a given range for bad sectors 29 * @bb: the badblocks structure that holds all badblock information 30 * @s: sector (start) at which to check for badblocks 31 * @sectors: number of sectors to check for badblocks 32 * @first_bad: pointer to store location of the first badblock 33 * @bad_sectors: pointer to store number of badblocks after @first_bad 34 * 35 * We can record which blocks on each device are 'bad' and so just 36 * fail those blocks, or that stripe, rather than the whole device. 37 * Entries in the bad-block table are 64bits wide. This comprises: 38 * Length of bad-range, in sectors: 0-511 for lengths 1-512 39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) 40 * A 'shift' can be set so that larger blocks are tracked and 41 * consequently larger devices can be covered. 42 * 'Acknowledged' flag - 1 bit. - the most significant bit. 43 * 44 * Locking of the bad-block table uses a seqlock so badblocks_check 45 * might need to retry if it is very unlucky. 46 * We will sometimes want to check for bad blocks in a bi_end_io function, 47 * so we use the write_seqlock_irq variant. 48 * 49 * When looking for a bad block we specify a range and want to 50 * know if any block in the range is bad. So we binary-search 51 * to the last range that starts at-or-before the given endpoint, 52 * (or "before the sector after the target range") 53 * then see if it ends after the given start. 54 * 55 * Return: 56 * 0: there are no known bad blocks in the range 57 * 1: there are known bad block which are all acknowledged 58 * -1: there are bad blocks which have not yet been acknowledged in metadata. 59 * plus the start/length of the first bad section we overlap. 60 */ 61 int badblocks_check(struct badblocks *bb, sector_t s, int sectors, 62 sector_t *first_bad, int *bad_sectors) 63 { 64 int hi; 65 int lo; 66 u64 *p = bb->page; 67 int rv; 68 sector_t target = s + sectors; 69 unsigned seq; 70 71 if (bb->shift > 0) { 72 /* round the start down, and the end up */ 73 s >>= bb->shift; 74 target += (1<<bb->shift) - 1; 75 target >>= bb->shift; 76 sectors = target - s; 77 } 78 /* 'target' is now the first block after the bad range */ 79 80 retry: 81 seq = read_seqbegin(&bb->lock); 82 lo = 0; 83 rv = 0; 84 hi = bb->count; 85 86 /* Binary search between lo and hi for 'target' 87 * i.e. for the last range that starts before 'target' 88 */ 89 /* INVARIANT: ranges before 'lo' and at-or-after 'hi' 90 * are known not to be the last range before target. 91 * VARIANT: hi-lo is the number of possible 92 * ranges, and decreases until it reaches 1 93 */ 94 while (hi - lo > 1) { 95 int mid = (lo + hi) / 2; 96 sector_t a = BB_OFFSET(p[mid]); 97 98 if (a < target) 99 /* This could still be the one, earlier ranges 100 * could not. 101 */ 102 lo = mid; 103 else 104 /* This and later ranges are definitely out. */ 105 hi = mid; 106 } 107 /* 'lo' might be the last that started before target, but 'hi' isn't */ 108 if (hi > lo) { 109 /* need to check all range that end after 's' to see if 110 * any are unacknowledged. 111 */ 112 while (lo >= 0 && 113 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { 114 if (BB_OFFSET(p[lo]) < target) { 115 /* starts before the end, and finishes after 116 * the start, so they must overlap 117 */ 118 if (rv != -1 && BB_ACK(p[lo])) 119 rv = 1; 120 else 121 rv = -1; 122 *first_bad = BB_OFFSET(p[lo]); 123 *bad_sectors = BB_LEN(p[lo]); 124 } 125 lo--; 126 } 127 } 128 129 if (read_seqretry(&bb->lock, seq)) 130 goto retry; 131 132 return rv; 133 } 134 EXPORT_SYMBOL_GPL(badblocks_check); 135 136 /** 137 * badblocks_set() - Add a range of bad blocks to the table. 138 * @bb: the badblocks structure that holds all badblock information 139 * @s: first sector to mark as bad 140 * @sectors: number of sectors to mark as bad 141 * @acknowledged: weather to mark the bad sectors as acknowledged 142 * 143 * This might extend the table, or might contract it if two adjacent ranges 144 * can be merged. We binary-search to find the 'insertion' point, then 145 * decide how best to handle it. 146 * 147 * Return: 148 * 0: success 149 * 1: failed to set badblocks (out of space) 150 */ 151 int badblocks_set(struct badblocks *bb, sector_t s, int sectors, 152 int acknowledged) 153 { 154 u64 *p; 155 int lo, hi; 156 int rv = 0; 157 unsigned long flags; 158 159 if (bb->shift < 0) 160 /* badblocks are disabled */ 161 return 0; 162 163 if (bb->shift) { 164 /* round the start down, and the end up */ 165 sector_t next = s + sectors; 166 167 s >>= bb->shift; 168 next += (1<<bb->shift) - 1; 169 next >>= bb->shift; 170 sectors = next - s; 171 } 172 173 write_seqlock_irqsave(&bb->lock, flags); 174 175 p = bb->page; 176 lo = 0; 177 hi = bb->count; 178 /* Find the last range that starts at-or-before 's' */ 179 while (hi - lo > 1) { 180 int mid = (lo + hi) / 2; 181 sector_t a = BB_OFFSET(p[mid]); 182 183 if (a <= s) 184 lo = mid; 185 else 186 hi = mid; 187 } 188 if (hi > lo && BB_OFFSET(p[lo]) > s) 189 hi = lo; 190 191 if (hi > lo) { 192 /* we found a range that might merge with the start 193 * of our new range 194 */ 195 sector_t a = BB_OFFSET(p[lo]); 196 sector_t e = a + BB_LEN(p[lo]); 197 int ack = BB_ACK(p[lo]); 198 199 if (e >= s) { 200 /* Yes, we can merge with a previous range */ 201 if (s == a && s + sectors >= e) 202 /* new range covers old */ 203 ack = acknowledged; 204 else 205 ack = ack && acknowledged; 206 207 if (e < s + sectors) 208 e = s + sectors; 209 if (e - a <= BB_MAX_LEN) { 210 p[lo] = BB_MAKE(a, e-a, ack); 211 s = e; 212 } else { 213 /* does not all fit in one range, 214 * make p[lo] maximal 215 */ 216 if (BB_LEN(p[lo]) != BB_MAX_LEN) 217 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); 218 s = a + BB_MAX_LEN; 219 } 220 sectors = e - s; 221 } 222 } 223 if (sectors && hi < bb->count) { 224 /* 'hi' points to the first range that starts after 's'. 225 * Maybe we can merge with the start of that range 226 */ 227 sector_t a = BB_OFFSET(p[hi]); 228 sector_t e = a + BB_LEN(p[hi]); 229 int ack = BB_ACK(p[hi]); 230 231 if (a <= s + sectors) { 232 /* merging is possible */ 233 if (e <= s + sectors) { 234 /* full overlap */ 235 e = s + sectors; 236 ack = acknowledged; 237 } else 238 ack = ack && acknowledged; 239 240 a = s; 241 if (e - a <= BB_MAX_LEN) { 242 p[hi] = BB_MAKE(a, e-a, ack); 243 s = e; 244 } else { 245 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); 246 s = a + BB_MAX_LEN; 247 } 248 sectors = e - s; 249 lo = hi; 250 hi++; 251 } 252 } 253 if (sectors == 0 && hi < bb->count) { 254 /* we might be able to combine lo and hi */ 255 /* Note: 's' is at the end of 'lo' */ 256 sector_t a = BB_OFFSET(p[hi]); 257 int lolen = BB_LEN(p[lo]); 258 int hilen = BB_LEN(p[hi]); 259 int newlen = lolen + hilen - (s - a); 260 261 if (s >= a && newlen < BB_MAX_LEN) { 262 /* yes, we can combine them */ 263 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); 264 265 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); 266 memmove(p + hi, p + hi + 1, 267 (bb->count - hi - 1) * 8); 268 bb->count--; 269 } 270 } 271 while (sectors) { 272 /* didn't merge (it all). 273 * Need to add a range just before 'hi' 274 */ 275 if (bb->count >= MAX_BADBLOCKS) { 276 /* No room for more */ 277 rv = 1; 278 break; 279 } else { 280 int this_sectors = sectors; 281 282 memmove(p + hi + 1, p + hi, 283 (bb->count - hi) * 8); 284 bb->count++; 285 286 if (this_sectors > BB_MAX_LEN) 287 this_sectors = BB_MAX_LEN; 288 p[hi] = BB_MAKE(s, this_sectors, acknowledged); 289 sectors -= this_sectors; 290 s += this_sectors; 291 } 292 } 293 294 bb->changed = 1; 295 if (!acknowledged) 296 bb->unacked_exist = 1; 297 write_sequnlock_irqrestore(&bb->lock, flags); 298 299 return rv; 300 } 301 EXPORT_SYMBOL_GPL(badblocks_set); 302 303 /** 304 * badblocks_clear() - Remove a range of bad blocks to the table. 305 * @bb: the badblocks structure that holds all badblock information 306 * @s: first sector to mark as bad 307 * @sectors: number of sectors to mark as bad 308 * 309 * This may involve extending the table if we spilt a region, 310 * but it must not fail. So if the table becomes full, we just 311 * drop the remove request. 312 * 313 * Return: 314 * 0: success 315 * 1: failed to clear badblocks 316 */ 317 int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) 318 { 319 u64 *p; 320 int lo, hi; 321 sector_t target = s + sectors; 322 int rv = 0; 323 324 if (bb->shift > 0) { 325 /* When clearing we round the start up and the end down. 326 * This should not matter as the shift should align with 327 * the block size and no rounding should ever be needed. 328 * However it is better the think a block is bad when it 329 * isn't than to think a block is not bad when it is. 330 */ 331 s += (1<<bb->shift) - 1; 332 s >>= bb->shift; 333 target >>= bb->shift; 334 sectors = target - s; 335 } 336 337 write_seqlock_irq(&bb->lock); 338 339 p = bb->page; 340 lo = 0; 341 hi = bb->count; 342 /* Find the last range that starts before 'target' */ 343 while (hi - lo > 1) { 344 int mid = (lo + hi) / 2; 345 sector_t a = BB_OFFSET(p[mid]); 346 347 if (a < target) 348 lo = mid; 349 else 350 hi = mid; 351 } 352 if (hi > lo) { 353 /* p[lo] is the last range that could overlap the 354 * current range. Earlier ranges could also overlap, 355 * but only this one can overlap the end of the range. 356 */ 357 if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && 358 (BB_OFFSET(p[lo]) < target)) { 359 /* Partial overlap, leave the tail of this range */ 360 int ack = BB_ACK(p[lo]); 361 sector_t a = BB_OFFSET(p[lo]); 362 sector_t end = a + BB_LEN(p[lo]); 363 364 if (a < s) { 365 /* we need to split this range */ 366 if (bb->count >= MAX_BADBLOCKS) { 367 rv = -ENOSPC; 368 goto out; 369 } 370 memmove(p+lo+1, p+lo, (bb->count - lo) * 8); 371 bb->count++; 372 p[lo] = BB_MAKE(a, s-a, ack); 373 lo++; 374 } 375 p[lo] = BB_MAKE(target, end - target, ack); 376 /* there is no longer an overlap */ 377 hi = lo; 378 lo--; 379 } 380 while (lo >= 0 && 381 (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && 382 (BB_OFFSET(p[lo]) < target)) { 383 /* This range does overlap */ 384 if (BB_OFFSET(p[lo]) < s) { 385 /* Keep the early parts of this range. */ 386 int ack = BB_ACK(p[lo]); 387 sector_t start = BB_OFFSET(p[lo]); 388 389 p[lo] = BB_MAKE(start, s - start, ack); 390 /* now low doesn't overlap, so.. */ 391 break; 392 } 393 lo--; 394 } 395 /* 'lo' is strictly before, 'hi' is strictly after, 396 * anything between needs to be discarded 397 */ 398 if (hi - lo > 1) { 399 memmove(p+lo+1, p+hi, (bb->count - hi) * 8); 400 bb->count -= (hi - lo - 1); 401 } 402 } 403 404 bb->changed = 1; 405 out: 406 write_sequnlock_irq(&bb->lock); 407 return rv; 408 } 409 EXPORT_SYMBOL_GPL(badblocks_clear); 410 411 /** 412 * ack_all_badblocks() - Acknowledge all bad blocks in a list. 413 * @bb: the badblocks structure that holds all badblock information 414 * 415 * This only succeeds if ->changed is clear. It is used by 416 * in-kernel metadata updates 417 */ 418 void ack_all_badblocks(struct badblocks *bb) 419 { 420 if (bb->page == NULL || bb->changed) 421 /* no point even trying */ 422 return; 423 write_seqlock_irq(&bb->lock); 424 425 if (bb->changed == 0 && bb->unacked_exist) { 426 u64 *p = bb->page; 427 int i; 428 429 for (i = 0; i < bb->count ; i++) { 430 if (!BB_ACK(p[i])) { 431 sector_t start = BB_OFFSET(p[i]); 432 int len = BB_LEN(p[i]); 433 434 p[i] = BB_MAKE(start, len, 1); 435 } 436 } 437 bb->unacked_exist = 0; 438 } 439 write_sequnlock_irq(&bb->lock); 440 } 441 EXPORT_SYMBOL_GPL(ack_all_badblocks); 442 443 /** 444 * badblocks_show() - sysfs access to bad-blocks list 445 * @bb: the badblocks structure that holds all badblock information 446 * @page: buffer received from sysfs 447 * @unack: weather to show unacknowledged badblocks 448 * 449 * Return: 450 * Length of returned data 451 */ 452 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) 453 { 454 size_t len; 455 int i; 456 u64 *p = bb->page; 457 unsigned seq; 458 459 if (bb->shift < 0) 460 return 0; 461 462 retry: 463 seq = read_seqbegin(&bb->lock); 464 465 len = 0; 466 i = 0; 467 468 while (len < PAGE_SIZE && i < bb->count) { 469 sector_t s = BB_OFFSET(p[i]); 470 unsigned int length = BB_LEN(p[i]); 471 int ack = BB_ACK(p[i]); 472 473 i++; 474 475 if (unack && ack) 476 continue; 477 478 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", 479 (unsigned long long)s << bb->shift, 480 length << bb->shift); 481 } 482 if (unack && len == 0) 483 bb->unacked_exist = 0; 484 485 if (read_seqretry(&bb->lock, seq)) 486 goto retry; 487 488 return len; 489 } 490 EXPORT_SYMBOL_GPL(badblocks_show); 491 492 /** 493 * badblocks_store() - sysfs access to bad-blocks list 494 * @bb: the badblocks structure that holds all badblock information 495 * @page: buffer received from sysfs 496 * @len: length of data received from sysfs 497 * @unack: weather to show unacknowledged badblocks 498 * 499 * Return: 500 * Length of the buffer processed or -ve error. 501 */ 502 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, 503 int unack) 504 { 505 unsigned long long sector; 506 int length; 507 char newline; 508 509 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { 510 case 3: 511 if (newline != '\n') 512 return -EINVAL; 513 case 2: 514 if (length <= 0) 515 return -EINVAL; 516 break; 517 default: 518 return -EINVAL; 519 } 520 521 if (badblocks_set(bb, sector, length, !unack)) 522 return -ENOSPC; 523 else 524 return len; 525 } 526 EXPORT_SYMBOL_GPL(badblocks_store); 527 528 static int __badblocks_init(struct device *dev, struct badblocks *bb, 529 int enable) 530 { 531 bb->dev = dev; 532 bb->count = 0; 533 if (enable) 534 bb->shift = 0; 535 else 536 bb->shift = -1; 537 if (dev) 538 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); 539 else 540 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); 541 if (!bb->page) { 542 bb->shift = -1; 543 return -ENOMEM; 544 } 545 seqlock_init(&bb->lock); 546 547 return 0; 548 } 549 550 /** 551 * badblocks_init() - initialize the badblocks structure 552 * @bb: the badblocks structure that holds all badblock information 553 * @enable: weather to enable badblocks accounting 554 * 555 * Return: 556 * 0: success 557 * -ve errno: on error 558 */ 559 int badblocks_init(struct badblocks *bb, int enable) 560 { 561 return __badblocks_init(NULL, bb, enable); 562 } 563 EXPORT_SYMBOL_GPL(badblocks_init); 564 565 int devm_init_badblocks(struct device *dev, struct badblocks *bb) 566 { 567 if (!bb) 568 return -EINVAL; 569 return __badblocks_init(dev, bb, 1); 570 } 571 EXPORT_SYMBOL_GPL(devm_init_badblocks); 572 573 /** 574 * badblocks_exit() - free the badblocks structure 575 * @bb: the badblocks structure that holds all badblock information 576 */ 577 void badblocks_exit(struct badblocks *bb) 578 { 579 if (!bb) 580 return; 581 if (bb->dev) 582 devm_kfree(bb->dev, bb->page); 583 else 584 kfree(bb->page); 585 bb->page = NULL; 586 } 587 EXPORT_SYMBOL_GPL(badblocks_exit); 588