1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _FS_CEPH_OSD_CLIENT_H 3 #define _FS_CEPH_OSD_CLIENT_H 4 5 #include <linux/bitrev.h> 6 #include <linux/completion.h> 7 #include <linux/kref.h> 8 #include <linux/mempool.h> 9 #include <linux/rbtree.h> 10 #include <linux/refcount.h> 11 #include <linux/ktime.h> 12 13 #include <linux/ceph/types.h> 14 #include <linux/ceph/osdmap.h> 15 #include <linux/ceph/messenger.h> 16 #include <linux/ceph/msgpool.h> 17 #include <linux/ceph/auth.h> 18 #include <linux/ceph/pagelist.h> 19 20 struct ceph_msg; 21 struct ceph_snap_context; 22 struct ceph_osd_request; 23 struct ceph_osd_client; 24 25 /* 26 * completion callback for async writepages 27 */ 28 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); 29 30 #define CEPH_HOMELESS_OSD -1 31 32 /* 33 * A given osd we're communicating with. 34 * 35 * Note that the o_requests tree can be searched while holding the "lock" mutex 36 * or the "o_requests_lock" spinlock. Insertion or removal requires both! 37 */ 38 struct ceph_osd { 39 refcount_t o_ref; 40 struct ceph_osd_client *o_osdc; 41 int o_osd; 42 int o_incarnation; 43 struct rb_node o_node; 44 struct ceph_connection o_con; 45 spinlock_t o_requests_lock; 46 struct rb_root o_requests; 47 struct rb_root o_linger_requests; 48 struct rb_root o_backoff_mappings; 49 struct rb_root o_backoffs_by_id; 50 struct list_head o_osd_lru; 51 struct ceph_auth_handshake o_auth; 52 unsigned long lru_ttl; 53 struct list_head o_keepalive_item; 54 struct mutex lock; 55 }; 56 57 #define CEPH_OSD_SLAB_OPS 2 58 #define CEPH_OSD_MAX_OPS 16 59 60 enum ceph_osd_data_type { 61 CEPH_OSD_DATA_TYPE_NONE = 0, 62 CEPH_OSD_DATA_TYPE_PAGES, 63 CEPH_OSD_DATA_TYPE_PAGELIST, 64 #ifdef CONFIG_BLOCK 65 CEPH_OSD_DATA_TYPE_BIO, 66 #endif /* CONFIG_BLOCK */ 67 CEPH_OSD_DATA_TYPE_BVECS, 68 }; 69 70 struct ceph_osd_data { 71 enum ceph_osd_data_type type; 72 union { 73 struct { 74 struct page **pages; 75 u64 length; 76 u32 alignment; 77 bool pages_from_pool; 78 bool own_pages; 79 }; 80 struct ceph_pagelist *pagelist; 81 #ifdef CONFIG_BLOCK 82 struct { 83 struct ceph_bio_iter bio_pos; 84 u32 bio_length; 85 }; 86 #endif /* CONFIG_BLOCK */ 87 struct { 88 struct ceph_bvec_iter bvec_pos; 89 u32 num_bvecs; 90 }; 91 }; 92 }; 93 94 struct ceph_osd_req_op { 95 u16 op; /* CEPH_OSD_OP_* */ 96 u32 flags; /* CEPH_OSD_OP_FLAG_* */ 97 u32 indata_len; /* request */ 98 u32 outdata_len; /* reply */ 99 s32 rval; 100 101 union { 102 struct ceph_osd_data raw_data_in; 103 struct { 104 u64 offset, length; 105 u64 truncate_size; 106 u32 truncate_seq; 107 struct ceph_osd_data osd_data; 108 } extent; 109 struct { 110 u32 name_len; 111 u32 value_len; 112 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 113 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 114 struct ceph_osd_data osd_data; 115 } xattr; 116 struct { 117 const char *class_name; 118 const char *method_name; 119 struct ceph_osd_data request_info; 120 struct ceph_osd_data request_data; 121 struct ceph_osd_data response_data; 122 __u8 class_len; 123 __u8 method_len; 124 u32 indata_len; 125 } cls; 126 struct { 127 u64 cookie; 128 __u8 op; /* CEPH_OSD_WATCH_OP_ */ 129 u32 gen; 130 } watch; 131 struct { 132 struct ceph_osd_data request_data; 133 } notify_ack; 134 struct { 135 u64 cookie; 136 struct ceph_osd_data request_data; 137 struct ceph_osd_data response_data; 138 } notify; 139 struct { 140 struct ceph_osd_data response_data; 141 } list_watchers; 142 struct { 143 u64 expected_object_size; 144 u64 expected_write_size; 145 u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */ 146 } alloc_hint; 147 struct { 148 u64 snapid; 149 u64 src_version; 150 u8 flags; 151 u32 src_fadvise_flags; 152 struct ceph_osd_data osd_data; 153 } copy_from; 154 }; 155 }; 156 157 struct ceph_osd_request_target { 158 struct ceph_object_id base_oid; 159 struct ceph_object_locator base_oloc; 160 struct ceph_object_id target_oid; 161 struct ceph_object_locator target_oloc; 162 163 struct ceph_pg pgid; /* last raw pg we mapped to */ 164 struct ceph_spg spgid; /* last actual spg we mapped to */ 165 u32 pg_num; 166 u32 pg_num_mask; 167 struct ceph_osds acting; 168 struct ceph_osds up; 169 int size; 170 int min_size; 171 bool sort_bitwise; 172 bool recovery_deletes; 173 174 unsigned int flags; /* CEPH_OSD_FLAG_* */ 175 bool used_replica; 176 bool paused; 177 178 u32 epoch; 179 u32 last_force_resend; 180 181 int osd; 182 }; 183 184 /* an in-flight request */ 185 struct ceph_osd_request { 186 u64 r_tid; /* unique for this client */ 187 struct rb_node r_node; 188 struct rb_node r_mc_node; /* map check */ 189 struct work_struct r_complete_work; 190 struct ceph_osd *r_osd; 191 192 struct ceph_osd_request_target r_t; 193 #define r_base_oid r_t.base_oid 194 #define r_base_oloc r_t.base_oloc 195 #define r_flags r_t.flags 196 197 struct ceph_msg *r_request, *r_reply; 198 u32 r_sent; /* >0 if r_request is sending/sent */ 199 200 /* request osd ops array */ 201 unsigned int r_num_ops; 202 203 int r_result; 204 205 struct ceph_osd_client *r_osdc; 206 struct kref r_kref; 207 bool r_mempool; 208 struct completion r_completion; /* private to osd_client.c */ 209 ceph_osdc_callback_t r_callback; 210 211 struct inode *r_inode; /* for use by callbacks */ 212 struct list_head r_private_item; /* ditto */ 213 void *r_priv; /* ditto */ 214 215 /* set by submitter */ 216 u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ 217 struct ceph_snap_context *r_snapc; /* for writes */ 218 struct timespec64 r_mtime; /* ditto */ 219 u64 r_data_offset; /* ditto */ 220 bool r_linger; /* don't resend on failure */ 221 222 /* internal */ 223 unsigned long r_stamp; /* jiffies, send or check time */ 224 unsigned long r_start_stamp; /* jiffies */ 225 ktime_t r_start_latency; /* ktime_t */ 226 ktime_t r_end_latency; /* ktime_t */ 227 int r_attempts; 228 u32 r_map_dne_bound; 229 230 struct ceph_osd_req_op r_ops[]; 231 }; 232 233 struct ceph_request_redirect { 234 struct ceph_object_locator oloc; 235 }; 236 237 /* 238 * osd request identifier 239 * 240 * caller name + incarnation# + tid to unique identify this request 241 */ 242 struct ceph_osd_reqid { 243 struct ceph_entity_name name; 244 __le64 tid; 245 __le32 inc; 246 } __packed; 247 248 struct ceph_blkin_trace_info { 249 __le64 trace_id; 250 __le64 span_id; 251 __le64 parent_span_id; 252 } __packed; 253 254 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, 255 u64 notifier_id, void *data, size_t data_len); 256 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); 257 258 struct ceph_osd_linger_request { 259 struct ceph_osd_client *osdc; 260 u64 linger_id; 261 bool committed; 262 bool is_watch; /* watch or notify */ 263 264 struct ceph_osd *osd; 265 struct ceph_osd_request *reg_req; 266 struct ceph_osd_request *ping_req; 267 unsigned long ping_sent; 268 unsigned long watch_valid_thru; 269 struct list_head pending_lworks; 270 271 struct ceph_osd_request_target t; 272 u32 map_dne_bound; 273 274 struct timespec64 mtime; 275 276 struct kref kref; 277 struct mutex lock; 278 struct rb_node node; /* osd */ 279 struct rb_node osdc_node; /* osdc */ 280 struct rb_node mc_node; /* map check */ 281 struct list_head scan_item; 282 283 struct completion reg_commit_wait; 284 struct completion notify_finish_wait; 285 int reg_commit_error; 286 int notify_finish_error; 287 int last_error; 288 289 u32 register_gen; 290 u64 notify_id; 291 292 rados_watchcb2_t wcb; 293 rados_watcherrcb_t errcb; 294 void *data; 295 296 struct ceph_pagelist *request_pl; 297 struct page **notify_id_pages; 298 299 struct page ***preply_pages; 300 size_t *preply_len; 301 }; 302 303 struct ceph_watch_item { 304 struct ceph_entity_name name; 305 u64 cookie; 306 struct ceph_entity_addr addr; 307 }; 308 309 struct ceph_spg_mapping { 310 struct rb_node node; 311 struct ceph_spg spgid; 312 313 struct rb_root backoffs; 314 }; 315 316 struct ceph_hobject_id { 317 void *key; 318 size_t key_len; 319 void *oid; 320 size_t oid_len; 321 u64 snapid; 322 u32 hash; 323 u8 is_max; 324 void *nspace; 325 size_t nspace_len; 326 s64 pool; 327 328 /* cache */ 329 u32 hash_reverse_bits; 330 }; 331 332 static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) 333 { 334 hoid->hash_reverse_bits = bitrev32(hoid->hash); 335 } 336 337 /* 338 * PG-wide backoff: [begin, end) 339 * per-object backoff: begin == end 340 */ 341 struct ceph_osd_backoff { 342 struct rb_node spg_node; 343 struct rb_node id_node; 344 345 struct ceph_spg spgid; 346 u64 id; 347 struct ceph_hobject_id *begin; 348 struct ceph_hobject_id *end; 349 }; 350 351 #define CEPH_LINGER_ID_START 0xffff000000000000ULL 352 353 struct ceph_osd_client { 354 struct ceph_client *client; 355 356 struct ceph_osdmap *osdmap; /* current map */ 357 struct rw_semaphore lock; 358 359 struct rb_root osds; /* osds */ 360 struct list_head osd_lru; /* idle osds */ 361 spinlock_t osd_lru_lock; 362 u32 epoch_barrier; 363 struct ceph_osd homeless_osd; 364 atomic64_t last_tid; /* tid of last request */ 365 u64 last_linger_id; 366 struct rb_root linger_requests; /* lingering requests */ 367 struct rb_root map_checks; 368 struct rb_root linger_map_checks; 369 atomic_t num_requests; 370 atomic_t num_homeless; 371 int abort_err; 372 struct delayed_work timeout_work; 373 struct delayed_work osds_timeout_work; 374 #ifdef CONFIG_DEBUG_FS 375 struct dentry *debugfs_file; 376 #endif 377 378 mempool_t *req_mempool; 379 380 struct ceph_msgpool msgpool_op; 381 struct ceph_msgpool msgpool_op_reply; 382 383 struct workqueue_struct *notify_wq; 384 struct workqueue_struct *completion_wq; 385 }; 386 387 static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) 388 { 389 return osdc->osdmap->flags & flag; 390 } 391 392 extern int ceph_osdc_setup(void); 393 extern void ceph_osdc_cleanup(void); 394 395 extern int ceph_osdc_init(struct ceph_osd_client *osdc, 396 struct ceph_client *client); 397 extern void ceph_osdc_stop(struct ceph_osd_client *osdc); 398 extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); 399 400 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, 401 struct ceph_msg *msg); 402 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 403 struct ceph_msg *msg); 404 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); 405 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); 406 void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); 407 408 #define osd_req_op_data(oreq, whch, typ, fld) \ 409 ({ \ 410 struct ceph_osd_request *__oreq = (oreq); \ 411 unsigned int __whch = (whch); \ 412 BUG_ON(__whch >= __oreq->r_num_ops); \ 413 &__oreq->r_ops[__whch].typ.fld; \ 414 }) 415 416 struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req, 417 unsigned int which, u16 opcode, u32 flags); 418 419 extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, 420 unsigned int which, 421 struct page **pages, u64 length, 422 u32 alignment, bool pages_from_pool, 423 bool own_pages); 424 425 extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, 426 unsigned int which, u16 opcode, 427 u64 offset, u64 length, 428 u64 truncate_size, u32 truncate_seq); 429 extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, 430 unsigned int which, u64 length); 431 extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, 432 unsigned int which, u64 offset_inc); 433 434 extern struct ceph_osd_data *osd_req_op_extent_osd_data( 435 struct ceph_osd_request *osd_req, 436 unsigned int which); 437 438 extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, 439 unsigned int which, 440 struct page **pages, u64 length, 441 u32 alignment, bool pages_from_pool, 442 bool own_pages); 443 extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, 444 unsigned int which, 445 struct ceph_pagelist *pagelist); 446 #ifdef CONFIG_BLOCK 447 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 448 unsigned int which, 449 struct ceph_bio_iter *bio_pos, 450 u32 bio_length); 451 #endif /* CONFIG_BLOCK */ 452 void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, 453 unsigned int which, 454 struct bio_vec *bvecs, u32 num_bvecs, 455 u32 bytes); 456 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, 457 unsigned int which, 458 struct ceph_bvec_iter *bvec_pos); 459 460 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, 461 unsigned int which, 462 struct ceph_pagelist *pagelist); 463 extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, 464 unsigned int which, 465 struct page **pages, u64 length, 466 u32 alignment, bool pages_from_pool, 467 bool own_pages); 468 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, 469 unsigned int which, 470 struct bio_vec *bvecs, u32 num_bvecs, 471 u32 bytes); 472 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, 473 unsigned int which, 474 struct page **pages, u64 length, 475 u32 alignment, bool pages_from_pool, 476 bool own_pages); 477 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 478 const char *class, const char *method); 479 extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 480 u16 opcode, const char *name, const void *value, 481 size_t size, u8 cmp_op, u8 cmp_mode); 482 extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 483 unsigned int which, 484 u64 expected_object_size, 485 u64 expected_write_size, 486 u32 flags); 487 extern int osd_req_op_copy_from_init(struct ceph_osd_request *req, 488 u64 src_snapid, u64 src_version, 489 struct ceph_object_id *src_oid, 490 struct ceph_object_locator *src_oloc, 491 u32 src_fadvise_flags, 492 u32 dst_fadvise_flags, 493 u32 truncate_seq, u64 truncate_size, 494 u8 copy_from_flags); 495 496 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 497 struct ceph_snap_context *snapc, 498 unsigned int num_ops, 499 bool use_mempool, 500 gfp_t gfp_flags); 501 int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); 502 503 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 504 struct ceph_file_layout *layout, 505 struct ceph_vino vino, 506 u64 offset, u64 *len, 507 unsigned int which, int num_ops, 508 int opcode, int flags, 509 struct ceph_snap_context *snapc, 510 u32 truncate_seq, u64 truncate_size, 511 bool use_mempool); 512 513 extern void ceph_osdc_get_request(struct ceph_osd_request *req); 514 extern void ceph_osdc_put_request(struct ceph_osd_request *req); 515 516 void ceph_osdc_start_request(struct ceph_osd_client *osdc, 517 struct ceph_osd_request *req); 518 extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); 519 extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 520 struct ceph_osd_request *req); 521 extern void ceph_osdc_sync(struct ceph_osd_client *osdc); 522 523 extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); 524 void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); 525 526 int ceph_osdc_call(struct ceph_osd_client *osdc, 527 struct ceph_object_id *oid, 528 struct ceph_object_locator *oloc, 529 const char *class, const char *method, 530 unsigned int flags, 531 struct page *req_page, size_t req_len, 532 struct page **resp_pages, size_t *resp_len); 533 534 /* watch/notify */ 535 struct ceph_osd_linger_request * 536 ceph_osdc_watch(struct ceph_osd_client *osdc, 537 struct ceph_object_id *oid, 538 struct ceph_object_locator *oloc, 539 rados_watchcb2_t wcb, 540 rados_watcherrcb_t errcb, 541 void *data); 542 int ceph_osdc_unwatch(struct ceph_osd_client *osdc, 543 struct ceph_osd_linger_request *lreq); 544 545 int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, 546 struct ceph_object_id *oid, 547 struct ceph_object_locator *oloc, 548 u64 notify_id, 549 u64 cookie, 550 void *payload, 551 u32 payload_len); 552 int ceph_osdc_notify(struct ceph_osd_client *osdc, 553 struct ceph_object_id *oid, 554 struct ceph_object_locator *oloc, 555 void *payload, 556 u32 payload_len, 557 u32 timeout, 558 struct page ***preply_pages, 559 size_t *preply_len); 560 int ceph_osdc_watch_check(struct ceph_osd_client *osdc, 561 struct ceph_osd_linger_request *lreq); 562 int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, 563 struct ceph_object_id *oid, 564 struct ceph_object_locator *oloc, 565 struct ceph_watch_item **watchers, 566 u32 *num_watchers); 567 #endif 568 569