1 /* 2 * linux/ipc/msg.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * 5 * Removed all the remaining kerneld mess 6 * Catch the -EFAULT stuff properly 7 * Use GFP_KERNEL for messages as in 1.2 8 * Fixed up the unchecked user space derefs 9 * Copyright (C) 1998 Alan Cox & Andi Kleen 10 * 11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <[email protected]> 12 * 13 * mostly rewritten, threaded and wake-one semantics added 14 * MSGMAX limit removed, sysctl's added 15 * (c) 1999 Manfred Spraul <[email protected]> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <[email protected]> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <[email protected]> 23 */ 24 25 #include <linux/capability.h> 26 #include <linux/msg.h> 27 #include <linux/spinlock.h> 28 #include <linux/init.h> 29 #include <linux/mm.h> 30 #include <linux/proc_fs.h> 31 #include <linux/list.h> 32 #include <linux/security.h> 33 #include <linux/sched.h> 34 #include <linux/syscalls.h> 35 #include <linux/audit.h> 36 #include <linux/seq_file.h> 37 #include <linux/rwsem.h> 38 #include <linux/nsproxy.h> 39 #include <linux/ipc_namespace.h> 40 41 #include <asm/current.h> 42 #include <asm/uaccess.h> 43 #include "util.h" 44 45 /* 46 * one msg_receiver structure for each sleeping receiver: 47 */ 48 struct msg_receiver { 49 struct list_head r_list; 50 struct task_struct *r_tsk; 51 52 int r_mode; 53 long r_msgtype; 54 long r_maxsize; 55 56 struct msg_msg *volatile r_msg; 57 }; 58 59 /* one msg_sender for each sleeping sender */ 60 struct msg_sender { 61 struct list_head list; 62 struct task_struct *tsk; 63 }; 64 65 #define SEARCH_ANY 1 66 #define SEARCH_EQUAL 2 67 #define SEARCH_NOTEQUAL 3 68 #define SEARCH_LESSEQUAL 4 69 #define SEARCH_NUMBER 5 70 71 #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) 72 73 #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) 74 75 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); 76 static int newque(struct ipc_namespace *, struct ipc_params *); 77 #ifdef CONFIG_PROC_FS 78 static int sysvipc_msg_proc_show(struct seq_file *s, void *it); 79 #endif 80 81 /* 82 * Scale msgmni with the available lowmem size: the memory dedicated to msg 83 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. 84 * Also take into account the number of nsproxies created so far. 85 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. 86 */ 87 void recompute_msgmni(struct ipc_namespace *ns) 88 { 89 struct sysinfo i; 90 unsigned long allowed; 91 int nb_ns; 92 93 si_meminfo(&i); 94 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) 95 / MSGMNB; 96 nb_ns = atomic_read(&nr_ipc_ns); 97 allowed /= nb_ns; 98 99 if (allowed < MSGMNI) { 100 ns->msg_ctlmni = MSGMNI; 101 return; 102 } 103 104 if (allowed > IPCMNI / nb_ns) { 105 ns->msg_ctlmni = IPCMNI / nb_ns; 106 return; 107 } 108 109 ns->msg_ctlmni = allowed; 110 } 111 112 void msg_init_ns(struct ipc_namespace *ns) 113 { 114 ns->msg_ctlmax = MSGMAX; 115 ns->msg_ctlmnb = MSGMNB; 116 117 recompute_msgmni(ns); 118 119 atomic_set(&ns->msg_bytes, 0); 120 atomic_set(&ns->msg_hdrs, 0); 121 ipc_init_ids(&ns->ids[IPC_MSG_IDS]); 122 } 123 124 #ifdef CONFIG_IPC_NS 125 void msg_exit_ns(struct ipc_namespace *ns) 126 { 127 free_ipcs(ns, &msg_ids(ns), freeque); 128 idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); 129 } 130 #endif 131 132 void __init msg_init(void) 133 { 134 msg_init_ns(&init_ipc_ns); 135 136 printk(KERN_INFO "msgmni has been set to %d\n", 137 init_ipc_ns.msg_ctlmni); 138 139 ipc_init_proc_interface("sysvipc/msg", 140 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 141 IPC_MSG_IDS, sysvipc_msg_proc_show); 142 } 143 144 static inline struct msg_queue *msq_obtain_object(struct ipc_namespace *ns, int id) 145 { 146 struct kern_ipc_perm *ipcp = ipc_obtain_object(&msg_ids(ns), id); 147 148 if (IS_ERR(ipcp)) 149 return ERR_CAST(ipcp); 150 151 return container_of(ipcp, struct msg_queue, q_perm); 152 } 153 154 static inline struct msg_queue *msq_obtain_object_check(struct ipc_namespace *ns, 155 int id) 156 { 157 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&msg_ids(ns), id); 158 159 if (IS_ERR(ipcp)) 160 return ERR_CAST(ipcp); 161 162 return container_of(ipcp, struct msg_queue, q_perm); 163 } 164 165 static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) 166 { 167 ipc_rmid(&msg_ids(ns), &s->q_perm); 168 } 169 170 /** 171 * newque - Create a new msg queue 172 * @ns: namespace 173 * @params: ptr to the structure that contains the key and msgflg 174 * 175 * Called with msg_ids.rw_mutex held (writer) 176 */ 177 static int newque(struct ipc_namespace *ns, struct ipc_params *params) 178 { 179 struct msg_queue *msq; 180 int id, retval; 181 key_t key = params->key; 182 int msgflg = params->flg; 183 184 msq = ipc_rcu_alloc(sizeof(*msq)); 185 if (!msq) 186 return -ENOMEM; 187 188 msq->q_perm.mode = msgflg & S_IRWXUGO; 189 msq->q_perm.key = key; 190 191 msq->q_perm.security = NULL; 192 retval = security_msg_queue_alloc(msq); 193 if (retval) { 194 ipc_rcu_putref(msq); 195 return retval; 196 } 197 198 /* ipc_addid() locks msq upon success. */ 199 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 200 if (id < 0) { 201 security_msg_queue_free(msq); 202 ipc_rcu_putref(msq); 203 return id; 204 } 205 206 msq->q_stime = msq->q_rtime = 0; 207 msq->q_ctime = get_seconds(); 208 msq->q_cbytes = msq->q_qnum = 0; 209 msq->q_qbytes = ns->msg_ctlmnb; 210 msq->q_lspid = msq->q_lrpid = 0; 211 INIT_LIST_HEAD(&msq->q_messages); 212 INIT_LIST_HEAD(&msq->q_receivers); 213 INIT_LIST_HEAD(&msq->q_senders); 214 215 ipc_unlock_object(&msq->q_perm); 216 rcu_read_unlock(); 217 218 return msq->q_perm.id; 219 } 220 221 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) 222 { 223 mss->tsk = current; 224 current->state = TASK_INTERRUPTIBLE; 225 list_add_tail(&mss->list, &msq->q_senders); 226 } 227 228 static inline void ss_del(struct msg_sender *mss) 229 { 230 if (mss->list.next != NULL) 231 list_del(&mss->list); 232 } 233 234 static void ss_wakeup(struct list_head *h, int kill) 235 { 236 struct msg_sender *mss, *t; 237 238 list_for_each_entry_safe(mss, t, h, list) { 239 if (kill) 240 mss->list.next = NULL; 241 wake_up_process(mss->tsk); 242 } 243 } 244 245 static void expunge_all(struct msg_queue *msq, int res) 246 { 247 struct msg_receiver *msr, *t; 248 249 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 250 msr->r_msg = NULL; 251 wake_up_process(msr->r_tsk); 252 smp_mb(); 253 msr->r_msg = ERR_PTR(res); 254 } 255 } 256 257 /* 258 * freeque() wakes up waiters on the sender and receiver waiting queue, 259 * removes the message queue from message queue ID IDR, and cleans up all the 260 * messages associated with this queue. 261 * 262 * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held 263 * before freeque() is called. msg_ids.rw_mutex remains locked on exit. 264 */ 265 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 266 { 267 struct msg_msg *msg, *t; 268 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 269 270 expunge_all(msq, -EIDRM); 271 ss_wakeup(&msq->q_senders, 1); 272 msg_rmid(ns, msq); 273 msg_unlock(msq); 274 275 list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { 276 atomic_dec(&ns->msg_hdrs); 277 free_msg(msg); 278 } 279 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 280 security_msg_queue_free(msq); 281 ipc_rcu_putref(msq); 282 } 283 284 /* 285 * Called with msg_ids.rw_mutex and ipcp locked. 286 */ 287 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) 288 { 289 struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); 290 291 return security_msg_queue_associate(msq, msgflg); 292 } 293 294 SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) 295 { 296 struct ipc_namespace *ns; 297 struct ipc_ops msg_ops; 298 struct ipc_params msg_params; 299 300 ns = current->nsproxy->ipc_ns; 301 302 msg_ops.getnew = newque; 303 msg_ops.associate = msg_security; 304 msg_ops.more_checks = NULL; 305 306 msg_params.key = key; 307 msg_params.flg = msgflg; 308 309 return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); 310 } 311 312 static inline unsigned long 313 copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) 314 { 315 switch(version) { 316 case IPC_64: 317 return copy_to_user(buf, in, sizeof(*in)); 318 case IPC_OLD: 319 { 320 struct msqid_ds out; 321 322 memset(&out, 0, sizeof(out)); 323 324 ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm); 325 326 out.msg_stime = in->msg_stime; 327 out.msg_rtime = in->msg_rtime; 328 out.msg_ctime = in->msg_ctime; 329 330 if (in->msg_cbytes > USHRT_MAX) 331 out.msg_cbytes = USHRT_MAX; 332 else 333 out.msg_cbytes = in->msg_cbytes; 334 out.msg_lcbytes = in->msg_cbytes; 335 336 if (in->msg_qnum > USHRT_MAX) 337 out.msg_qnum = USHRT_MAX; 338 else 339 out.msg_qnum = in->msg_qnum; 340 341 if (in->msg_qbytes > USHRT_MAX) 342 out.msg_qbytes = USHRT_MAX; 343 else 344 out.msg_qbytes = in->msg_qbytes; 345 out.msg_lqbytes = in->msg_qbytes; 346 347 out.msg_lspid = in->msg_lspid; 348 out.msg_lrpid = in->msg_lrpid; 349 350 return copy_to_user(buf, &out, sizeof(out)); 351 } 352 default: 353 return -EINVAL; 354 } 355 } 356 357 static inline unsigned long 358 copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) 359 { 360 switch(version) { 361 case IPC_64: 362 if (copy_from_user(out, buf, sizeof(*out))) 363 return -EFAULT; 364 return 0; 365 case IPC_OLD: 366 { 367 struct msqid_ds tbuf_old; 368 369 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 370 return -EFAULT; 371 372 out->msg_perm.uid = tbuf_old.msg_perm.uid; 373 out->msg_perm.gid = tbuf_old.msg_perm.gid; 374 out->msg_perm.mode = tbuf_old.msg_perm.mode; 375 376 if (tbuf_old.msg_qbytes == 0) 377 out->msg_qbytes = tbuf_old.msg_lqbytes; 378 else 379 out->msg_qbytes = tbuf_old.msg_qbytes; 380 381 return 0; 382 } 383 default: 384 return -EINVAL; 385 } 386 } 387 388 /* 389 * This function handles some msgctl commands which require the rw_mutex 390 * to be held in write mode. 391 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 392 */ 393 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, 394 struct msqid_ds __user *buf, int version) 395 { 396 struct kern_ipc_perm *ipcp; 397 struct msqid64_ds uninitialized_var(msqid64); 398 struct msg_queue *msq; 399 int err; 400 401 if (cmd == IPC_SET) { 402 if (copy_msqid_from_user(&msqid64, buf, version)) 403 return -EFAULT; 404 } 405 406 down_write(&msg_ids(ns).rw_mutex); 407 rcu_read_lock(); 408 409 ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd, 410 &msqid64.msg_perm, msqid64.msg_qbytes); 411 if (IS_ERR(ipcp)) { 412 err = PTR_ERR(ipcp); 413 goto out_unlock1; 414 } 415 416 msq = container_of(ipcp, struct msg_queue, q_perm); 417 418 err = security_msg_queue_msgctl(msq, cmd); 419 if (err) 420 goto out_unlock1; 421 422 switch (cmd) { 423 case IPC_RMID: 424 ipc_lock_object(&msq->q_perm); 425 /* freeque unlocks the ipc object and rcu */ 426 freeque(ns, ipcp); 427 goto out_up; 428 case IPC_SET: 429 if (msqid64.msg_qbytes > ns->msg_ctlmnb && 430 !capable(CAP_SYS_RESOURCE)) { 431 err = -EPERM; 432 goto out_unlock1; 433 } 434 435 ipc_lock_object(&msq->q_perm); 436 err = ipc_update_perm(&msqid64.msg_perm, ipcp); 437 if (err) 438 goto out_unlock0; 439 440 msq->q_qbytes = msqid64.msg_qbytes; 441 442 msq->q_ctime = get_seconds(); 443 /* sleeping receivers might be excluded by 444 * stricter permissions. 445 */ 446 expunge_all(msq, -EAGAIN); 447 /* sleeping senders might be able to send 448 * due to a larger queue size. 449 */ 450 ss_wakeup(&msq->q_senders, 0); 451 break; 452 default: 453 err = -EINVAL; 454 goto out_unlock1; 455 } 456 457 out_unlock0: 458 ipc_unlock_object(&msq->q_perm); 459 out_unlock1: 460 rcu_read_unlock(); 461 out_up: 462 up_write(&msg_ids(ns).rw_mutex); 463 return err; 464 } 465 466 static int msgctl_nolock(struct ipc_namespace *ns, int msqid, 467 int cmd, int version, void __user *buf) 468 { 469 int err; 470 struct msg_queue *msq; 471 472 switch (cmd) { 473 case IPC_INFO: 474 case MSG_INFO: 475 { 476 struct msginfo msginfo; 477 int max_id; 478 479 if (!buf) 480 return -EFAULT; 481 482 /* 483 * We must not return kernel stack data. 484 * due to padding, it's not enough 485 * to set all member fields. 486 */ 487 err = security_msg_queue_msgctl(NULL, cmd); 488 if (err) 489 return err; 490 491 memset(&msginfo, 0, sizeof(msginfo)); 492 msginfo.msgmni = ns->msg_ctlmni; 493 msginfo.msgmax = ns->msg_ctlmax; 494 msginfo.msgmnb = ns->msg_ctlmnb; 495 msginfo.msgssz = MSGSSZ; 496 msginfo.msgseg = MSGSEG; 497 down_read(&msg_ids(ns).rw_mutex); 498 if (cmd == MSG_INFO) { 499 msginfo.msgpool = msg_ids(ns).in_use; 500 msginfo.msgmap = atomic_read(&ns->msg_hdrs); 501 msginfo.msgtql = atomic_read(&ns->msg_bytes); 502 } else { 503 msginfo.msgmap = MSGMAP; 504 msginfo.msgpool = MSGPOOL; 505 msginfo.msgtql = MSGTQL; 506 } 507 max_id = ipc_get_maxid(&msg_ids(ns)); 508 up_read(&msg_ids(ns).rw_mutex); 509 if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) 510 return -EFAULT; 511 return (max_id < 0) ? 0 : max_id; 512 } 513 514 case MSG_STAT: 515 case IPC_STAT: 516 { 517 struct msqid64_ds tbuf; 518 int success_return; 519 520 if (!buf) 521 return -EFAULT; 522 523 memset(&tbuf, 0, sizeof(tbuf)); 524 525 rcu_read_lock(); 526 if (cmd == MSG_STAT) { 527 msq = msq_obtain_object(ns, msqid); 528 if (IS_ERR(msq)) { 529 err = PTR_ERR(msq); 530 goto out_unlock; 531 } 532 success_return = msq->q_perm.id; 533 } else { 534 msq = msq_obtain_object_check(ns, msqid); 535 if (IS_ERR(msq)) { 536 err = PTR_ERR(msq); 537 goto out_unlock; 538 } 539 success_return = 0; 540 } 541 542 err = -EACCES; 543 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 544 goto out_unlock; 545 546 err = security_msg_queue_msgctl(msq, cmd); 547 if (err) 548 goto out_unlock; 549 550 kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); 551 tbuf.msg_stime = msq->q_stime; 552 tbuf.msg_rtime = msq->q_rtime; 553 tbuf.msg_ctime = msq->q_ctime; 554 tbuf.msg_cbytes = msq->q_cbytes; 555 tbuf.msg_qnum = msq->q_qnum; 556 tbuf.msg_qbytes = msq->q_qbytes; 557 tbuf.msg_lspid = msq->q_lspid; 558 tbuf.msg_lrpid = msq->q_lrpid; 559 rcu_read_unlock(); 560 561 if (copy_msqid_to_user(buf, &tbuf, version)) 562 return -EFAULT; 563 return success_return; 564 } 565 566 default: 567 return -EINVAL; 568 } 569 570 return err; 571 out_unlock: 572 rcu_read_unlock(); 573 return err; 574 } 575 576 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) 577 { 578 int version; 579 struct ipc_namespace *ns; 580 581 if (msqid < 0 || cmd < 0) 582 return -EINVAL; 583 584 version = ipc_parse_version(&cmd); 585 ns = current->nsproxy->ipc_ns; 586 587 switch (cmd) { 588 case IPC_INFO: 589 case MSG_INFO: 590 case MSG_STAT: /* msqid is an index rather than a msg queue id */ 591 case IPC_STAT: 592 return msgctl_nolock(ns, msqid, cmd, version, buf); 593 case IPC_SET: 594 case IPC_RMID: 595 return msgctl_down(ns, msqid, cmd, buf, version); 596 default: 597 return -EINVAL; 598 } 599 } 600 601 static int testmsg(struct msg_msg *msg, long type, int mode) 602 { 603 switch(mode) 604 { 605 case SEARCH_ANY: 606 case SEARCH_NUMBER: 607 return 1; 608 case SEARCH_LESSEQUAL: 609 if (msg->m_type <=type) 610 return 1; 611 break; 612 case SEARCH_EQUAL: 613 if (msg->m_type == type) 614 return 1; 615 break; 616 case SEARCH_NOTEQUAL: 617 if (msg->m_type != type) 618 return 1; 619 break; 620 } 621 return 0; 622 } 623 624 static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) 625 { 626 struct msg_receiver *msr, *t; 627 628 list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { 629 if (testmsg(msg, msr->r_msgtype, msr->r_mode) && 630 !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, 631 msr->r_msgtype, msr->r_mode)) { 632 633 list_del(&msr->r_list); 634 if (msr->r_maxsize < msg->m_ts) { 635 msr->r_msg = NULL; 636 wake_up_process(msr->r_tsk); 637 smp_mb(); 638 msr->r_msg = ERR_PTR(-E2BIG); 639 } else { 640 msr->r_msg = NULL; 641 msq->q_lrpid = task_pid_vnr(msr->r_tsk); 642 msq->q_rtime = get_seconds(); 643 wake_up_process(msr->r_tsk); 644 smp_mb(); 645 msr->r_msg = msg; 646 647 return 1; 648 } 649 } 650 } 651 return 0; 652 } 653 654 long do_msgsnd(int msqid, long mtype, void __user *mtext, 655 size_t msgsz, int msgflg) 656 { 657 struct msg_queue *msq; 658 struct msg_msg *msg; 659 int err; 660 struct ipc_namespace *ns; 661 662 ns = current->nsproxy->ipc_ns; 663 664 if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) 665 return -EINVAL; 666 if (mtype < 1) 667 return -EINVAL; 668 669 msg = load_msg(mtext, msgsz); 670 if (IS_ERR(msg)) 671 return PTR_ERR(msg); 672 673 msg->m_type = mtype; 674 msg->m_ts = msgsz; 675 676 rcu_read_lock(); 677 msq = msq_obtain_object_check(ns, msqid); 678 if (IS_ERR(msq)) { 679 err = PTR_ERR(msq); 680 goto out_unlock1; 681 } 682 683 ipc_lock_object(&msq->q_perm); 684 685 for (;;) { 686 struct msg_sender s; 687 688 err = -EACCES; 689 if (ipcperms(ns, &msq->q_perm, S_IWUGO)) 690 goto out_unlock0; 691 692 err = security_msg_queue_msgsnd(msq, msg, msgflg); 693 if (err) 694 goto out_unlock0; 695 696 if (msgsz + msq->q_cbytes <= msq->q_qbytes && 697 1 + msq->q_qnum <= msq->q_qbytes) { 698 break; 699 } 700 701 /* queue full, wait: */ 702 if (msgflg & IPC_NOWAIT) { 703 err = -EAGAIN; 704 goto out_unlock0; 705 } 706 707 ss_add(msq, &s); 708 709 if (!ipc_rcu_getref(msq)) { 710 err = -EIDRM; 711 goto out_unlock0; 712 } 713 714 ipc_unlock_object(&msq->q_perm); 715 rcu_read_unlock(); 716 schedule(); 717 718 rcu_read_lock(); 719 ipc_lock_object(&msq->q_perm); 720 721 ipc_rcu_putref(msq); 722 if (msq->q_perm.deleted) { 723 err = -EIDRM; 724 goto out_unlock0; 725 } 726 727 ss_del(&s); 728 729 if (signal_pending(current)) { 730 err = -ERESTARTNOHAND; 731 goto out_unlock0; 732 } 733 734 } 735 msq->q_lspid = task_tgid_vnr(current); 736 msq->q_stime = get_seconds(); 737 738 if (!pipelined_send(msq, msg)) { 739 /* no one is waiting for this message, enqueue it */ 740 list_add_tail(&msg->m_list, &msq->q_messages); 741 msq->q_cbytes += msgsz; 742 msq->q_qnum++; 743 atomic_add(msgsz, &ns->msg_bytes); 744 atomic_inc(&ns->msg_hdrs); 745 } 746 747 err = 0; 748 msg = NULL; 749 750 out_unlock0: 751 ipc_unlock_object(&msq->q_perm); 752 out_unlock1: 753 rcu_read_unlock(); 754 if (msg != NULL) 755 free_msg(msg); 756 return err; 757 } 758 759 SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 760 int, msgflg) 761 { 762 long mtype; 763 764 if (get_user(mtype, &msgp->mtype)) 765 return -EFAULT; 766 return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); 767 } 768 769 static inline int convert_mode(long *msgtyp, int msgflg) 770 { 771 if (msgflg & MSG_COPY) 772 return SEARCH_NUMBER; 773 /* 774 * find message of correct type. 775 * msgtyp = 0 => get first. 776 * msgtyp > 0 => get first message of matching type. 777 * msgtyp < 0 => get message with least type must be < abs(msgtype). 778 */ 779 if (*msgtyp == 0) 780 return SEARCH_ANY; 781 if (*msgtyp < 0) { 782 *msgtyp = -*msgtyp; 783 return SEARCH_LESSEQUAL; 784 } 785 if (msgflg & MSG_EXCEPT) 786 return SEARCH_NOTEQUAL; 787 return SEARCH_EQUAL; 788 } 789 790 static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) 791 { 792 struct msgbuf __user *msgp = dest; 793 size_t msgsz; 794 795 if (put_user(msg->m_type, &msgp->mtype)) 796 return -EFAULT; 797 798 msgsz = (bufsz > msg->m_ts) ? msg->m_ts : bufsz; 799 if (store_msg(msgp->mtext, msg, msgsz)) 800 return -EFAULT; 801 return msgsz; 802 } 803 804 #ifdef CONFIG_CHECKPOINT_RESTORE 805 /* 806 * This function creates new kernel message structure, large enough to store 807 * bufsz message bytes. 808 */ 809 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 810 { 811 struct msg_msg *copy; 812 813 /* 814 * Create dummy message to copy real message to. 815 */ 816 copy = load_msg(buf, bufsz); 817 if (!IS_ERR(copy)) 818 copy->m_ts = bufsz; 819 return copy; 820 } 821 822 static inline void free_copy(struct msg_msg *copy) 823 { 824 if (copy) 825 free_msg(copy); 826 } 827 #else 828 static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz) 829 { 830 return ERR_PTR(-ENOSYS); 831 } 832 833 static inline void free_copy(struct msg_msg *copy) 834 { 835 } 836 #endif 837 838 static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) 839 { 840 struct msg_msg *msg, *found = NULL; 841 long count = 0; 842 843 list_for_each_entry(msg, &msq->q_messages, m_list) { 844 if (testmsg(msg, *msgtyp, mode) && 845 !security_msg_queue_msgrcv(msq, msg, current, 846 *msgtyp, mode)) { 847 if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { 848 *msgtyp = msg->m_type - 1; 849 found = msg; 850 } else if (mode == SEARCH_NUMBER) { 851 if (*msgtyp == count) 852 return msg; 853 } else 854 return msg; 855 count++; 856 } 857 } 858 859 return found ?: ERR_PTR(-EAGAIN); 860 } 861 862 long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, 863 long (*msg_handler)(void __user *, struct msg_msg *, size_t)) 864 { 865 int mode; 866 struct msg_queue *msq; 867 struct ipc_namespace *ns; 868 struct msg_msg *msg, *copy = NULL; 869 870 ns = current->nsproxy->ipc_ns; 871 872 if (msqid < 0 || (long) bufsz < 0) 873 return -EINVAL; 874 875 if (msgflg & MSG_COPY) { 876 copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); 877 if (IS_ERR(copy)) 878 return PTR_ERR(copy); 879 } 880 mode = convert_mode(&msgtyp, msgflg); 881 882 rcu_read_lock(); 883 msq = msq_obtain_object_check(ns, msqid); 884 if (IS_ERR(msq)) { 885 rcu_read_unlock(); 886 free_copy(copy); 887 return PTR_ERR(msq); 888 } 889 890 for (;;) { 891 struct msg_receiver msr_d; 892 893 msg = ERR_PTR(-EACCES); 894 if (ipcperms(ns, &msq->q_perm, S_IRUGO)) 895 goto out_unlock1; 896 897 ipc_lock_object(&msq->q_perm); 898 msg = find_msg(msq, &msgtyp, mode); 899 if (!IS_ERR(msg)) { 900 /* 901 * Found a suitable message. 902 * Unlink it from the queue. 903 */ 904 if ((bufsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) { 905 msg = ERR_PTR(-E2BIG); 906 goto out_unlock0; 907 } 908 /* 909 * If we are copying, then do not unlink message and do 910 * not update queue parameters. 911 */ 912 if (msgflg & MSG_COPY) { 913 msg = copy_msg(msg, copy); 914 goto out_unlock0; 915 } 916 917 list_del(&msg->m_list); 918 msq->q_qnum--; 919 msq->q_rtime = get_seconds(); 920 msq->q_lrpid = task_tgid_vnr(current); 921 msq->q_cbytes -= msg->m_ts; 922 atomic_sub(msg->m_ts, &ns->msg_bytes); 923 atomic_dec(&ns->msg_hdrs); 924 ss_wakeup(&msq->q_senders, 0); 925 926 goto out_unlock0; 927 } 928 929 /* No message waiting. Wait for a message */ 930 if (msgflg & IPC_NOWAIT) { 931 msg = ERR_PTR(-ENOMSG); 932 goto out_unlock0; 933 } 934 935 list_add_tail(&msr_d.r_list, &msq->q_receivers); 936 msr_d.r_tsk = current; 937 msr_d.r_msgtype = msgtyp; 938 msr_d.r_mode = mode; 939 if (msgflg & MSG_NOERROR) 940 msr_d.r_maxsize = INT_MAX; 941 else 942 msr_d.r_maxsize = bufsz; 943 msr_d.r_msg = ERR_PTR(-EAGAIN); 944 current->state = TASK_INTERRUPTIBLE; 945 946 ipc_unlock_object(&msq->q_perm); 947 rcu_read_unlock(); 948 schedule(); 949 950 /* Lockless receive, part 1: 951 * Disable preemption. We don't hold a reference to the queue 952 * and getting a reference would defeat the idea of a lockless 953 * operation, thus the code relies on rcu to guarantee the 954 * existence of msq: 955 * Prior to destruction, expunge_all(-EIRDM) changes r_msg. 956 * Thus if r_msg is -EAGAIN, then the queue not yet destroyed. 957 * rcu_read_lock() prevents preemption between reading r_msg 958 * and acquiring the q_perm.lock in ipc_lock_object(). 959 */ 960 rcu_read_lock(); 961 962 /* Lockless receive, part 2: 963 * Wait until pipelined_send or expunge_all are outside of 964 * wake_up_process(). There is a race with exit(), see 965 * ipc/mqueue.c for the details. 966 */ 967 msg = (struct msg_msg*)msr_d.r_msg; 968 while (msg == NULL) { 969 cpu_relax(); 970 msg = (struct msg_msg *)msr_d.r_msg; 971 } 972 973 /* Lockless receive, part 3: 974 * If there is a message or an error then accept it without 975 * locking. 976 */ 977 if (msg != ERR_PTR(-EAGAIN)) 978 goto out_unlock1; 979 980 /* Lockless receive, part 3: 981 * Acquire the queue spinlock. 982 */ 983 ipc_lock_object(&msq->q_perm); 984 985 /* Lockless receive, part 4: 986 * Repeat test after acquiring the spinlock. 987 */ 988 msg = (struct msg_msg*)msr_d.r_msg; 989 if (msg != ERR_PTR(-EAGAIN)) 990 goto out_unlock0; 991 992 list_del(&msr_d.r_list); 993 if (signal_pending(current)) { 994 msg = ERR_PTR(-ERESTARTNOHAND); 995 goto out_unlock0; 996 } 997 998 ipc_unlock_object(&msq->q_perm); 999 } 1000 1001 out_unlock0: 1002 ipc_unlock_object(&msq->q_perm); 1003 out_unlock1: 1004 rcu_read_unlock(); 1005 if (IS_ERR(msg)) { 1006 free_copy(copy); 1007 return PTR_ERR(msg); 1008 } 1009 1010 bufsz = msg_handler(buf, msg, bufsz); 1011 free_msg(msg); 1012 1013 return bufsz; 1014 } 1015 1016 SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, 1017 long, msgtyp, int, msgflg) 1018 { 1019 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 1020 } 1021 1022 #ifdef CONFIG_PROC_FS 1023 static int sysvipc_msg_proc_show(struct seq_file *s, void *it) 1024 { 1025 struct user_namespace *user_ns = seq_user_ns(s); 1026 struct msg_queue *msq = it; 1027 1028 return seq_printf(s, 1029 "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", 1030 msq->q_perm.key, 1031 msq->q_perm.id, 1032 msq->q_perm.mode, 1033 msq->q_cbytes, 1034 msq->q_qnum, 1035 msq->q_lspid, 1036 msq->q_lrpid, 1037 from_kuid_munged(user_ns, msq->q_perm.uid), 1038 from_kgid_munged(user_ns, msq->q_perm.gid), 1039 from_kuid_munged(user_ns, msq->q_perm.cuid), 1040 from_kgid_munged(user_ns, msq->q_perm.cgid), 1041 msq->q_stime, 1042 msq->q_rtime, 1043 msq->q_ctime); 1044 } 1045 #endif 1046