1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/init.h> 5 #include <linux/namei.h> 6 #include <linux/pid_namespace.h> 7 #include <linux/fs.h> 8 #include <linux/fdtable.h> 9 #include <linux/filter.h> 10 #include <linux/btf_ids.h> 11 #include "mmap_unlock_work.h" 12 13 static const char * const iter_task_type_names[] = { 14 "ALL", 15 "TID", 16 "PID", 17 }; 18 19 struct bpf_iter_seq_task_common { 20 struct pid_namespace *ns; 21 enum bpf_iter_task_type type; 22 u32 pid; 23 u32 pid_visiting; 24 }; 25 26 struct bpf_iter_seq_task_info { 27 /* The first field must be struct bpf_iter_seq_task_common. 28 * this is assumed by {init, fini}_seq_pidns() callback functions. 29 */ 30 struct bpf_iter_seq_task_common common; 31 u32 tid; 32 }; 33 34 static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common, 35 u32 *tid, 36 bool skip_if_dup_files) 37 { 38 struct task_struct *task, *next_task; 39 struct pid *pid; 40 u32 saved_tid; 41 42 if (!*tid) { 43 /* The first time, the iterator calls this function. */ 44 pid = find_pid_ns(common->pid, common->ns); 45 task = get_pid_task(pid, PIDTYPE_TGID); 46 if (!task) 47 return NULL; 48 49 *tid = common->pid; 50 common->pid_visiting = common->pid; 51 52 return task; 53 } 54 55 /* If the control returns to user space and comes back to the 56 * kernel again, *tid and common->pid_visiting should be the 57 * same for task_seq_start() to pick up the correct task. 58 */ 59 if (*tid == common->pid_visiting) { 60 pid = find_pid_ns(common->pid_visiting, common->ns); 61 task = get_pid_task(pid, PIDTYPE_PID); 62 63 return task; 64 } 65 66 task = find_task_by_pid_ns(common->pid_visiting, common->ns); 67 if (!task) 68 return NULL; 69 70 retry: 71 next_task = next_thread(task); 72 73 saved_tid = *tid; 74 *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns); 75 if (!*tid || *tid == common->pid) { 76 /* Run out of tasks of a process. The tasks of a 77 * thread_group are linked as circular linked list. 78 */ 79 *tid = saved_tid; 80 return NULL; 81 } 82 83 common->pid_visiting = *tid; 84 85 if (skip_if_dup_files && next_task->files == next_task->group_leader->files) { 86 task = next_task; 87 goto retry; 88 } 89 90 get_task_struct(next_task); 91 return next_task; 92 } 93 94 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common, 95 u32 *tid, 96 bool skip_if_dup_files) 97 { 98 struct task_struct *task = NULL; 99 struct pid *pid; 100 101 if (common->type == BPF_TASK_ITER_TID) { 102 if (*tid && *tid != common->pid) 103 return NULL; 104 rcu_read_lock(); 105 pid = find_pid_ns(common->pid, common->ns); 106 if (pid) { 107 task = get_pid_task(pid, PIDTYPE_TGID); 108 *tid = common->pid; 109 } 110 rcu_read_unlock(); 111 112 return task; 113 } 114 115 if (common->type == BPF_TASK_ITER_TGID) { 116 rcu_read_lock(); 117 task = task_group_seq_get_next(common, tid, skip_if_dup_files); 118 rcu_read_unlock(); 119 120 return task; 121 } 122 123 rcu_read_lock(); 124 retry: 125 pid = find_ge_pid(*tid, common->ns); 126 if (pid) { 127 *tid = pid_nr_ns(pid, common->ns); 128 task = get_pid_task(pid, PIDTYPE_PID); 129 if (!task) { 130 ++*tid; 131 goto retry; 132 } else if (skip_if_dup_files && !thread_group_leader(task) && 133 task->files == task->group_leader->files) { 134 put_task_struct(task); 135 task = NULL; 136 ++*tid; 137 goto retry; 138 } 139 } 140 rcu_read_unlock(); 141 142 return task; 143 } 144 145 static void *task_seq_start(struct seq_file *seq, loff_t *pos) 146 { 147 struct bpf_iter_seq_task_info *info = seq->private; 148 struct task_struct *task; 149 150 task = task_seq_get_next(&info->common, &info->tid, false); 151 if (!task) 152 return NULL; 153 154 if (*pos == 0) 155 ++*pos; 156 return task; 157 } 158 159 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) 160 { 161 struct bpf_iter_seq_task_info *info = seq->private; 162 struct task_struct *task; 163 164 ++*pos; 165 ++info->tid; 166 put_task_struct((struct task_struct *)v); 167 task = task_seq_get_next(&info->common, &info->tid, false); 168 if (!task) 169 return NULL; 170 171 return task; 172 } 173 174 struct bpf_iter__task { 175 __bpf_md_ptr(struct bpf_iter_meta *, meta); 176 __bpf_md_ptr(struct task_struct *, task); 177 }; 178 179 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task) 180 181 static int __task_seq_show(struct seq_file *seq, struct task_struct *task, 182 bool in_stop) 183 { 184 struct bpf_iter_meta meta; 185 struct bpf_iter__task ctx; 186 struct bpf_prog *prog; 187 188 meta.seq = seq; 189 prog = bpf_iter_get_info(&meta, in_stop); 190 if (!prog) 191 return 0; 192 193 ctx.meta = &meta; 194 ctx.task = task; 195 return bpf_iter_run_prog(prog, &ctx); 196 } 197 198 static int task_seq_show(struct seq_file *seq, void *v) 199 { 200 return __task_seq_show(seq, v, false); 201 } 202 203 static void task_seq_stop(struct seq_file *seq, void *v) 204 { 205 if (!v) 206 (void)__task_seq_show(seq, v, true); 207 else 208 put_task_struct((struct task_struct *)v); 209 } 210 211 static int bpf_iter_attach_task(struct bpf_prog *prog, 212 union bpf_iter_link_info *linfo, 213 struct bpf_iter_aux_info *aux) 214 { 215 unsigned int flags; 216 struct pid *pid; 217 pid_t tgid; 218 219 if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1) 220 return -EINVAL; 221 222 aux->task.type = BPF_TASK_ITER_ALL; 223 if (linfo->task.tid != 0) { 224 aux->task.type = BPF_TASK_ITER_TID; 225 aux->task.pid = linfo->task.tid; 226 } 227 if (linfo->task.pid != 0) { 228 aux->task.type = BPF_TASK_ITER_TGID; 229 aux->task.pid = linfo->task.pid; 230 } 231 if (linfo->task.pid_fd != 0) { 232 aux->task.type = BPF_TASK_ITER_TGID; 233 234 pid = pidfd_get_pid(linfo->task.pid_fd, &flags); 235 if (IS_ERR(pid)) 236 return PTR_ERR(pid); 237 238 tgid = pid_nr_ns(pid, task_active_pid_ns(current)); 239 aux->task.pid = tgid; 240 put_pid(pid); 241 } 242 243 return 0; 244 } 245 246 static const struct seq_operations task_seq_ops = { 247 .start = task_seq_start, 248 .next = task_seq_next, 249 .stop = task_seq_stop, 250 .show = task_seq_show, 251 }; 252 253 struct bpf_iter_seq_task_file_info { 254 /* The first field must be struct bpf_iter_seq_task_common. 255 * this is assumed by {init, fini}_seq_pidns() callback functions. 256 */ 257 struct bpf_iter_seq_task_common common; 258 struct task_struct *task; 259 u32 tid; 260 u32 fd; 261 }; 262 263 static struct file * 264 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) 265 { 266 u32 saved_tid = info->tid; 267 struct task_struct *curr_task; 268 unsigned int curr_fd = info->fd; 269 270 /* If this function returns a non-NULL file object, 271 * it held a reference to the task/file. 272 * Otherwise, it does not hold any reference. 273 */ 274 again: 275 if (info->task) { 276 curr_task = info->task; 277 curr_fd = info->fd; 278 } else { 279 curr_task = task_seq_get_next(&info->common, &info->tid, true); 280 if (!curr_task) { 281 info->task = NULL; 282 return NULL; 283 } 284 285 /* set info->task */ 286 info->task = curr_task; 287 if (saved_tid == info->tid) 288 curr_fd = info->fd; 289 else 290 curr_fd = 0; 291 } 292 293 rcu_read_lock(); 294 for (;; curr_fd++) { 295 struct file *f; 296 f = task_lookup_next_fd_rcu(curr_task, &curr_fd); 297 if (!f) 298 break; 299 if (!get_file_rcu(f)) 300 continue; 301 302 /* set info->fd */ 303 info->fd = curr_fd; 304 rcu_read_unlock(); 305 return f; 306 } 307 308 /* the current task is done, go to the next task */ 309 rcu_read_unlock(); 310 put_task_struct(curr_task); 311 312 if (info->common.type == BPF_TASK_ITER_TID) { 313 info->task = NULL; 314 return NULL; 315 } 316 317 info->task = NULL; 318 info->fd = 0; 319 saved_tid = ++(info->tid); 320 goto again; 321 } 322 323 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) 324 { 325 struct bpf_iter_seq_task_file_info *info = seq->private; 326 struct file *file; 327 328 info->task = NULL; 329 file = task_file_seq_get_next(info); 330 if (file && *pos == 0) 331 ++*pos; 332 333 return file; 334 } 335 336 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) 337 { 338 struct bpf_iter_seq_task_file_info *info = seq->private; 339 340 ++*pos; 341 ++info->fd; 342 fput((struct file *)v); 343 return task_file_seq_get_next(info); 344 } 345 346 struct bpf_iter__task_file { 347 __bpf_md_ptr(struct bpf_iter_meta *, meta); 348 __bpf_md_ptr(struct task_struct *, task); 349 u32 fd __aligned(8); 350 __bpf_md_ptr(struct file *, file); 351 }; 352 353 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta, 354 struct task_struct *task, u32 fd, 355 struct file *file) 356 357 static int __task_file_seq_show(struct seq_file *seq, struct file *file, 358 bool in_stop) 359 { 360 struct bpf_iter_seq_task_file_info *info = seq->private; 361 struct bpf_iter__task_file ctx; 362 struct bpf_iter_meta meta; 363 struct bpf_prog *prog; 364 365 meta.seq = seq; 366 prog = bpf_iter_get_info(&meta, in_stop); 367 if (!prog) 368 return 0; 369 370 ctx.meta = &meta; 371 ctx.task = info->task; 372 ctx.fd = info->fd; 373 ctx.file = file; 374 return bpf_iter_run_prog(prog, &ctx); 375 } 376 377 static int task_file_seq_show(struct seq_file *seq, void *v) 378 { 379 return __task_file_seq_show(seq, v, false); 380 } 381 382 static void task_file_seq_stop(struct seq_file *seq, void *v) 383 { 384 struct bpf_iter_seq_task_file_info *info = seq->private; 385 386 if (!v) { 387 (void)__task_file_seq_show(seq, v, true); 388 } else { 389 fput((struct file *)v); 390 put_task_struct(info->task); 391 info->task = NULL; 392 } 393 } 394 395 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) 396 { 397 struct bpf_iter_seq_task_common *common = priv_data; 398 399 common->ns = get_pid_ns(task_active_pid_ns(current)); 400 common->type = aux->task.type; 401 common->pid = aux->task.pid; 402 403 return 0; 404 } 405 406 static void fini_seq_pidns(void *priv_data) 407 { 408 struct bpf_iter_seq_task_common *common = priv_data; 409 410 put_pid_ns(common->ns); 411 } 412 413 static const struct seq_operations task_file_seq_ops = { 414 .start = task_file_seq_start, 415 .next = task_file_seq_next, 416 .stop = task_file_seq_stop, 417 .show = task_file_seq_show, 418 }; 419 420 struct bpf_iter_seq_task_vma_info { 421 /* The first field must be struct bpf_iter_seq_task_common. 422 * this is assumed by {init, fini}_seq_pidns() callback functions. 423 */ 424 struct bpf_iter_seq_task_common common; 425 struct task_struct *task; 426 struct mm_struct *mm; 427 struct vm_area_struct *vma; 428 u32 tid; 429 unsigned long prev_vm_start; 430 unsigned long prev_vm_end; 431 }; 432 433 enum bpf_task_vma_iter_find_op { 434 task_vma_iter_first_vma, /* use find_vma() with addr 0 */ 435 task_vma_iter_next_vma, /* use vma_next() with curr_vma */ 436 task_vma_iter_find_vma, /* use find_vma() to find next vma */ 437 }; 438 439 static struct vm_area_struct * 440 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) 441 { 442 enum bpf_task_vma_iter_find_op op; 443 struct vm_area_struct *curr_vma; 444 struct task_struct *curr_task; 445 struct mm_struct *curr_mm; 446 u32 saved_tid = info->tid; 447 448 /* If this function returns a non-NULL vma, it holds a reference to 449 * the task_struct, holds a refcount on mm->mm_users, and holds 450 * read lock on vma->mm->mmap_lock. 451 * If this function returns NULL, it does not hold any reference or 452 * lock. 453 */ 454 if (info->task) { 455 curr_task = info->task; 456 curr_vma = info->vma; 457 curr_mm = info->mm; 458 /* In case of lock contention, drop mmap_lock to unblock 459 * the writer. 460 * 461 * After relock, call find(mm, prev_vm_end - 1) to find 462 * new vma to process. 463 * 464 * +------+------+-----------+ 465 * | VMA1 | VMA2 | VMA3 | 466 * +------+------+-----------+ 467 * | | | | 468 * 4k 8k 16k 400k 469 * 470 * For example, curr_vma == VMA2. Before unlock, we set 471 * 472 * prev_vm_start = 8k 473 * prev_vm_end = 16k 474 * 475 * There are a few cases: 476 * 477 * 1) VMA2 is freed, but VMA3 exists. 478 * 479 * find_vma() will return VMA3, just process VMA3. 480 * 481 * 2) VMA2 still exists. 482 * 483 * find_vma() will return VMA2, process VMA2->next. 484 * 485 * 3) no more vma in this mm. 486 * 487 * Process the next task. 488 * 489 * 4) find_vma() returns a different vma, VMA2'. 490 * 491 * 4.1) If VMA2 covers same range as VMA2', skip VMA2', 492 * because we already covered the range; 493 * 4.2) VMA2 and VMA2' covers different ranges, process 494 * VMA2'. 495 */ 496 if (mmap_lock_is_contended(curr_mm)) { 497 info->prev_vm_start = curr_vma->vm_start; 498 info->prev_vm_end = curr_vma->vm_end; 499 op = task_vma_iter_find_vma; 500 mmap_read_unlock(curr_mm); 501 if (mmap_read_lock_killable(curr_mm)) { 502 mmput(curr_mm); 503 goto finish; 504 } 505 } else { 506 op = task_vma_iter_next_vma; 507 } 508 } else { 509 again: 510 curr_task = task_seq_get_next(&info->common, &info->tid, true); 511 if (!curr_task) { 512 info->tid++; 513 goto finish; 514 } 515 516 if (saved_tid != info->tid) { 517 /* new task, process the first vma */ 518 op = task_vma_iter_first_vma; 519 } else { 520 /* Found the same tid, which means the user space 521 * finished data in previous buffer and read more. 522 * We dropped mmap_lock before returning to user 523 * space, so it is necessary to use find_vma() to 524 * find the next vma to process. 525 */ 526 op = task_vma_iter_find_vma; 527 } 528 529 curr_mm = get_task_mm(curr_task); 530 if (!curr_mm) 531 goto next_task; 532 533 if (mmap_read_lock_killable(curr_mm)) { 534 mmput(curr_mm); 535 goto finish; 536 } 537 } 538 539 switch (op) { 540 case task_vma_iter_first_vma: 541 curr_vma = find_vma(curr_mm, 0); 542 break; 543 case task_vma_iter_next_vma: 544 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 545 break; 546 case task_vma_iter_find_vma: 547 /* We dropped mmap_lock so it is necessary to use find_vma 548 * to find the next vma. This is similar to the mechanism 549 * in show_smaps_rollup(). 550 */ 551 curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); 552 /* case 1) and 4.2) above just use curr_vma */ 553 554 /* check for case 2) or case 4.1) above */ 555 if (curr_vma && 556 curr_vma->vm_start == info->prev_vm_start && 557 curr_vma->vm_end == info->prev_vm_end) 558 curr_vma = find_vma(curr_mm, curr_vma->vm_end); 559 break; 560 } 561 if (!curr_vma) { 562 /* case 3) above, or case 2) 4.1) with vma->next == NULL */ 563 mmap_read_unlock(curr_mm); 564 mmput(curr_mm); 565 goto next_task; 566 } 567 info->task = curr_task; 568 info->vma = curr_vma; 569 info->mm = curr_mm; 570 return curr_vma; 571 572 next_task: 573 if (info->common.type == BPF_TASK_ITER_TID) 574 goto finish; 575 576 put_task_struct(curr_task); 577 info->task = NULL; 578 info->mm = NULL; 579 info->tid++; 580 goto again; 581 582 finish: 583 if (curr_task) 584 put_task_struct(curr_task); 585 info->task = NULL; 586 info->vma = NULL; 587 info->mm = NULL; 588 return NULL; 589 } 590 591 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos) 592 { 593 struct bpf_iter_seq_task_vma_info *info = seq->private; 594 struct vm_area_struct *vma; 595 596 vma = task_vma_seq_get_next(info); 597 if (vma && *pos == 0) 598 ++*pos; 599 600 return vma; 601 } 602 603 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos) 604 { 605 struct bpf_iter_seq_task_vma_info *info = seq->private; 606 607 ++*pos; 608 return task_vma_seq_get_next(info); 609 } 610 611 struct bpf_iter__task_vma { 612 __bpf_md_ptr(struct bpf_iter_meta *, meta); 613 __bpf_md_ptr(struct task_struct *, task); 614 __bpf_md_ptr(struct vm_area_struct *, vma); 615 }; 616 617 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta, 618 struct task_struct *task, struct vm_area_struct *vma) 619 620 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop) 621 { 622 struct bpf_iter_seq_task_vma_info *info = seq->private; 623 struct bpf_iter__task_vma ctx; 624 struct bpf_iter_meta meta; 625 struct bpf_prog *prog; 626 627 meta.seq = seq; 628 prog = bpf_iter_get_info(&meta, in_stop); 629 if (!prog) 630 return 0; 631 632 ctx.meta = &meta; 633 ctx.task = info->task; 634 ctx.vma = info->vma; 635 return bpf_iter_run_prog(prog, &ctx); 636 } 637 638 static int task_vma_seq_show(struct seq_file *seq, void *v) 639 { 640 return __task_vma_seq_show(seq, false); 641 } 642 643 static void task_vma_seq_stop(struct seq_file *seq, void *v) 644 { 645 struct bpf_iter_seq_task_vma_info *info = seq->private; 646 647 if (!v) { 648 (void)__task_vma_seq_show(seq, true); 649 } else { 650 /* info->vma has not been seen by the BPF program. If the 651 * user space reads more, task_vma_seq_get_next should 652 * return this vma again. Set prev_vm_start to ~0UL, 653 * so that we don't skip the vma returned by the next 654 * find_vma() (case task_vma_iter_find_vma in 655 * task_vma_seq_get_next()). 656 */ 657 info->prev_vm_start = ~0UL; 658 info->prev_vm_end = info->vma->vm_end; 659 mmap_read_unlock(info->mm); 660 mmput(info->mm); 661 info->mm = NULL; 662 put_task_struct(info->task); 663 info->task = NULL; 664 } 665 } 666 667 static const struct seq_operations task_vma_seq_ops = { 668 .start = task_vma_seq_start, 669 .next = task_vma_seq_next, 670 .stop = task_vma_seq_stop, 671 .show = task_vma_seq_show, 672 }; 673 674 static const struct bpf_iter_seq_info task_seq_info = { 675 .seq_ops = &task_seq_ops, 676 .init_seq_private = init_seq_pidns, 677 .fini_seq_private = fini_seq_pidns, 678 .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), 679 }; 680 681 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info) 682 { 683 switch (aux->task.type) { 684 case BPF_TASK_ITER_TID: 685 info->iter.task.tid = aux->task.pid; 686 break; 687 case BPF_TASK_ITER_TGID: 688 info->iter.task.pid = aux->task.pid; 689 break; 690 default: 691 break; 692 } 693 return 0; 694 } 695 696 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq) 697 { 698 seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]); 699 if (aux->task.type == BPF_TASK_ITER_TID) 700 seq_printf(seq, "tid:\t%u\n", aux->task.pid); 701 else if (aux->task.type == BPF_TASK_ITER_TGID) 702 seq_printf(seq, "pid:\t%u\n", aux->task.pid); 703 } 704 705 static struct bpf_iter_reg task_reg_info = { 706 .target = "task", 707 .attach_target = bpf_iter_attach_task, 708 .feature = BPF_ITER_RESCHED, 709 .ctx_arg_info_size = 1, 710 .ctx_arg_info = { 711 { offsetof(struct bpf_iter__task, task), 712 PTR_TO_BTF_ID_OR_NULL }, 713 }, 714 .seq_info = &task_seq_info, 715 .fill_link_info = bpf_iter_fill_link_info, 716 .show_fdinfo = bpf_iter_task_show_fdinfo, 717 }; 718 719 static const struct bpf_iter_seq_info task_file_seq_info = { 720 .seq_ops = &task_file_seq_ops, 721 .init_seq_private = init_seq_pidns, 722 .fini_seq_private = fini_seq_pidns, 723 .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info), 724 }; 725 726 static struct bpf_iter_reg task_file_reg_info = { 727 .target = "task_file", 728 .attach_target = bpf_iter_attach_task, 729 .feature = BPF_ITER_RESCHED, 730 .ctx_arg_info_size = 2, 731 .ctx_arg_info = { 732 { offsetof(struct bpf_iter__task_file, task), 733 PTR_TO_BTF_ID_OR_NULL }, 734 { offsetof(struct bpf_iter__task_file, file), 735 PTR_TO_BTF_ID_OR_NULL }, 736 }, 737 .seq_info = &task_file_seq_info, 738 .fill_link_info = bpf_iter_fill_link_info, 739 .show_fdinfo = bpf_iter_task_show_fdinfo, 740 }; 741 742 static const struct bpf_iter_seq_info task_vma_seq_info = { 743 .seq_ops = &task_vma_seq_ops, 744 .init_seq_private = init_seq_pidns, 745 .fini_seq_private = fini_seq_pidns, 746 .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info), 747 }; 748 749 static struct bpf_iter_reg task_vma_reg_info = { 750 .target = "task_vma", 751 .attach_target = bpf_iter_attach_task, 752 .feature = BPF_ITER_RESCHED, 753 .ctx_arg_info_size = 2, 754 .ctx_arg_info = { 755 { offsetof(struct bpf_iter__task_vma, task), 756 PTR_TO_BTF_ID_OR_NULL }, 757 { offsetof(struct bpf_iter__task_vma, vma), 758 PTR_TO_BTF_ID_OR_NULL }, 759 }, 760 .seq_info = &task_vma_seq_info, 761 .fill_link_info = bpf_iter_fill_link_info, 762 .show_fdinfo = bpf_iter_task_show_fdinfo, 763 }; 764 765 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start, 766 bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags) 767 { 768 struct mmap_unlock_irq_work *work = NULL; 769 struct vm_area_struct *vma; 770 bool irq_work_busy = false; 771 struct mm_struct *mm; 772 int ret = -ENOENT; 773 774 if (flags) 775 return -EINVAL; 776 777 if (!task) 778 return -ENOENT; 779 780 mm = task->mm; 781 if (!mm) 782 return -ENOENT; 783 784 irq_work_busy = bpf_mmap_unlock_get_irq_work(&work); 785 786 if (irq_work_busy || !mmap_read_trylock(mm)) 787 return -EBUSY; 788 789 vma = find_vma(mm, start); 790 791 if (vma && vma->vm_start <= start && vma->vm_end > start) { 792 callback_fn((u64)(long)task, (u64)(long)vma, 793 (u64)(long)callback_ctx, 0, 0); 794 ret = 0; 795 } 796 bpf_mmap_unlock_mm(work, mm); 797 return ret; 798 } 799 800 const struct bpf_func_proto bpf_find_vma_proto = { 801 .func = bpf_find_vma, 802 .ret_type = RET_INTEGER, 803 .arg1_type = ARG_PTR_TO_BTF_ID, 804 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], 805 .arg2_type = ARG_ANYTHING, 806 .arg3_type = ARG_PTR_TO_FUNC, 807 .arg4_type = ARG_PTR_TO_STACK_OR_NULL, 808 .arg5_type = ARG_ANYTHING, 809 }; 810 811 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); 812 813 static void do_mmap_read_unlock(struct irq_work *entry) 814 { 815 struct mmap_unlock_irq_work *work; 816 817 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) 818 return; 819 820 work = container_of(entry, struct mmap_unlock_irq_work, irq_work); 821 mmap_read_unlock_non_owner(work->mm); 822 } 823 824 static int __init task_iter_init(void) 825 { 826 struct mmap_unlock_irq_work *work; 827 int ret, cpu; 828 829 for_each_possible_cpu(cpu) { 830 work = per_cpu_ptr(&mmap_unlock_work, cpu); 831 init_irq_work(&work->irq_work, do_mmap_read_unlock); 832 } 833 834 task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 835 ret = bpf_iter_reg_target(&task_reg_info); 836 if (ret) 837 return ret; 838 839 task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 840 task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE]; 841 ret = bpf_iter_reg_target(&task_file_reg_info); 842 if (ret) 843 return ret; 844 845 task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK]; 846 task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA]; 847 return bpf_iter_reg_target(&task_vma_reg_info); 848 } 849 late_initcall(task_iter_init); 850