1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _FS_CEPH_OSD_CLIENT_H 3 #define _FS_CEPH_OSD_CLIENT_H 4 5 #include <linux/bitrev.h> 6 #include <linux/completion.h> 7 #include <linux/kref.h> 8 #include <linux/mempool.h> 9 #include <linux/rbtree.h> 10 #include <linux/refcount.h> 11 #include <linux/ktime.h> 12 13 #include <linux/ceph/types.h> 14 #include <linux/ceph/osdmap.h> 15 #include <linux/ceph/messenger.h> 16 #include <linux/ceph/msgpool.h> 17 #include <linux/ceph/auth.h> 18 #include <linux/ceph/pagelist.h> 19 20 struct ceph_msg; 21 struct ceph_snap_context; 22 struct ceph_osd_request; 23 struct ceph_osd_client; 24 25 /* 26 * completion callback for async writepages 27 */ 28 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); 29 30 #define CEPH_HOMELESS_OSD -1 31 32 /* a given osd we're communicating with */ 33 struct ceph_osd { 34 refcount_t o_ref; 35 struct ceph_osd_client *o_osdc; 36 int o_osd; 37 int o_incarnation; 38 struct rb_node o_node; 39 struct ceph_connection o_con; 40 struct rb_root o_requests; 41 struct rb_root o_linger_requests; 42 struct rb_root o_backoff_mappings; 43 struct rb_root o_backoffs_by_id; 44 struct list_head o_osd_lru; 45 struct ceph_auth_handshake o_auth; 46 unsigned long lru_ttl; 47 struct list_head o_keepalive_item; 48 struct mutex lock; 49 }; 50 51 #define CEPH_OSD_SLAB_OPS 2 52 #define CEPH_OSD_MAX_OPS 16 53 54 enum ceph_osd_data_type { 55 CEPH_OSD_DATA_TYPE_NONE = 0, 56 CEPH_OSD_DATA_TYPE_PAGES, 57 CEPH_OSD_DATA_TYPE_PAGELIST, 58 #ifdef CONFIG_BLOCK 59 CEPH_OSD_DATA_TYPE_BIO, 60 #endif /* CONFIG_BLOCK */ 61 CEPH_OSD_DATA_TYPE_BVECS, 62 }; 63 64 struct ceph_osd_data { 65 enum ceph_osd_data_type type; 66 union { 67 struct { 68 struct page **pages; 69 u64 length; 70 u32 alignment; 71 bool pages_from_pool; 72 bool own_pages; 73 }; 74 struct ceph_pagelist *pagelist; 75 #ifdef CONFIG_BLOCK 76 struct { 77 struct ceph_bio_iter bio_pos; 78 u32 bio_length; 79 }; 80 #endif /* CONFIG_BLOCK */ 81 struct { 82 struct ceph_bvec_iter bvec_pos; 83 u32 num_bvecs; 84 }; 85 }; 86 }; 87 88 struct ceph_osd_req_op { 89 u16 op; /* CEPH_OSD_OP_* */ 90 u32 flags; /* CEPH_OSD_OP_FLAG_* */ 91 u32 indata_len; /* request */ 92 u32 outdata_len; /* reply */ 93 s32 rval; 94 95 union { 96 struct ceph_osd_data raw_data_in; 97 struct { 98 u64 offset, length; 99 u64 truncate_size; 100 u32 truncate_seq; 101 struct ceph_osd_data osd_data; 102 } extent; 103 struct { 104 u32 name_len; 105 u32 value_len; 106 __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ 107 __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ 108 struct ceph_osd_data osd_data; 109 } xattr; 110 struct { 111 const char *class_name; 112 const char *method_name; 113 struct ceph_osd_data request_info; 114 struct ceph_osd_data request_data; 115 struct ceph_osd_data response_data; 116 __u8 class_len; 117 __u8 method_len; 118 u32 indata_len; 119 } cls; 120 struct { 121 u64 cookie; 122 __u8 op; /* CEPH_OSD_WATCH_OP_ */ 123 u32 gen; 124 } watch; 125 struct { 126 struct ceph_osd_data request_data; 127 } notify_ack; 128 struct { 129 u64 cookie; 130 struct ceph_osd_data request_data; 131 struct ceph_osd_data response_data; 132 } notify; 133 struct { 134 struct ceph_osd_data response_data; 135 } list_watchers; 136 struct { 137 u64 expected_object_size; 138 u64 expected_write_size; 139 } alloc_hint; 140 struct { 141 u64 snapid; 142 u64 src_version; 143 u8 flags; 144 u32 src_fadvise_flags; 145 struct ceph_osd_data osd_data; 146 } copy_from; 147 }; 148 }; 149 150 struct ceph_osd_request_target { 151 struct ceph_object_id base_oid; 152 struct ceph_object_locator base_oloc; 153 struct ceph_object_id target_oid; 154 struct ceph_object_locator target_oloc; 155 156 struct ceph_pg pgid; /* last raw pg we mapped to */ 157 struct ceph_spg spgid; /* last actual spg we mapped to */ 158 u32 pg_num; 159 u32 pg_num_mask; 160 struct ceph_osds acting; 161 struct ceph_osds up; 162 int size; 163 int min_size; 164 bool sort_bitwise; 165 bool recovery_deletes; 166 167 unsigned int flags; /* CEPH_OSD_FLAG_* */ 168 bool paused; 169 170 u32 epoch; 171 u32 last_force_resend; 172 173 int osd; 174 }; 175 176 /* an in-flight request */ 177 struct ceph_osd_request { 178 u64 r_tid; /* unique for this client */ 179 struct rb_node r_node; 180 struct rb_node r_mc_node; /* map check */ 181 struct work_struct r_complete_work; 182 struct ceph_osd *r_osd; 183 184 struct ceph_osd_request_target r_t; 185 #define r_base_oid r_t.base_oid 186 #define r_base_oloc r_t.base_oloc 187 #define r_flags r_t.flags 188 189 struct ceph_msg *r_request, *r_reply; 190 u32 r_sent; /* >0 if r_request is sending/sent */ 191 192 /* request osd ops array */ 193 unsigned int r_num_ops; 194 195 int r_result; 196 197 struct ceph_osd_client *r_osdc; 198 struct kref r_kref; 199 bool r_mempool; 200 struct completion r_completion; /* private to osd_client.c */ 201 ceph_osdc_callback_t r_callback; 202 203 struct inode *r_inode; /* for use by callbacks */ 204 struct list_head r_private_item; /* ditto */ 205 void *r_priv; /* ditto */ 206 207 /* set by submitter */ 208 u64 r_snapid; /* for reads, CEPH_NOSNAP o/w */ 209 struct ceph_snap_context *r_snapc; /* for writes */ 210 struct timespec64 r_mtime; /* ditto */ 211 u64 r_data_offset; /* ditto */ 212 bool r_linger; /* don't resend on failure */ 213 214 /* internal */ 215 unsigned long r_stamp; /* jiffies, send or check time */ 216 unsigned long r_start_stamp; /* jiffies */ 217 ktime_t r_start_latency; /* ktime_t */ 218 ktime_t r_end_latency; /* ktime_t */ 219 int r_attempts; 220 u32 r_map_dne_bound; 221 222 struct ceph_osd_req_op r_ops[]; 223 }; 224 225 struct ceph_request_redirect { 226 struct ceph_object_locator oloc; 227 }; 228 229 /* 230 * osd request identifier 231 * 232 * caller name + incarnation# + tid to unique identify this request 233 */ 234 struct ceph_osd_reqid { 235 struct ceph_entity_name name; 236 __le64 tid; 237 __le32 inc; 238 } __packed; 239 240 struct ceph_blkin_trace_info { 241 __le64 trace_id; 242 __le64 span_id; 243 __le64 parent_span_id; 244 } __packed; 245 246 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie, 247 u64 notifier_id, void *data, size_t data_len); 248 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err); 249 250 struct ceph_osd_linger_request { 251 struct ceph_osd_client *osdc; 252 u64 linger_id; 253 bool committed; 254 bool is_watch; /* watch or notify */ 255 256 struct ceph_osd *osd; 257 struct ceph_osd_request *reg_req; 258 struct ceph_osd_request *ping_req; 259 unsigned long ping_sent; 260 unsigned long watch_valid_thru; 261 struct list_head pending_lworks; 262 263 struct ceph_osd_request_target t; 264 u32 map_dne_bound; 265 266 struct timespec64 mtime; 267 268 struct kref kref; 269 struct mutex lock; 270 struct rb_node node; /* osd */ 271 struct rb_node osdc_node; /* osdc */ 272 struct rb_node mc_node; /* map check */ 273 struct list_head scan_item; 274 275 struct completion reg_commit_wait; 276 struct completion notify_finish_wait; 277 int reg_commit_error; 278 int notify_finish_error; 279 int last_error; 280 281 u32 register_gen; 282 u64 notify_id; 283 284 rados_watchcb2_t wcb; 285 rados_watcherrcb_t errcb; 286 void *data; 287 288 struct page ***preply_pages; 289 size_t *preply_len; 290 }; 291 292 struct ceph_watch_item { 293 struct ceph_entity_name name; 294 u64 cookie; 295 struct ceph_entity_addr addr; 296 }; 297 298 struct ceph_spg_mapping { 299 struct rb_node node; 300 struct ceph_spg spgid; 301 302 struct rb_root backoffs; 303 }; 304 305 struct ceph_hobject_id { 306 void *key; 307 size_t key_len; 308 void *oid; 309 size_t oid_len; 310 u64 snapid; 311 u32 hash; 312 u8 is_max; 313 void *nspace; 314 size_t nspace_len; 315 s64 pool; 316 317 /* cache */ 318 u32 hash_reverse_bits; 319 }; 320 321 static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid) 322 { 323 hoid->hash_reverse_bits = bitrev32(hoid->hash); 324 } 325 326 /* 327 * PG-wide backoff: [begin, end) 328 * per-object backoff: begin == end 329 */ 330 struct ceph_osd_backoff { 331 struct rb_node spg_node; 332 struct rb_node id_node; 333 334 struct ceph_spg spgid; 335 u64 id; 336 struct ceph_hobject_id *begin; 337 struct ceph_hobject_id *end; 338 }; 339 340 #define CEPH_LINGER_ID_START 0xffff000000000000ULL 341 342 struct ceph_osd_client { 343 struct ceph_client *client; 344 345 struct ceph_osdmap *osdmap; /* current map */ 346 struct rw_semaphore lock; 347 348 struct rb_root osds; /* osds */ 349 struct list_head osd_lru; /* idle osds */ 350 spinlock_t osd_lru_lock; 351 u32 epoch_barrier; 352 struct ceph_osd homeless_osd; 353 atomic64_t last_tid; /* tid of last request */ 354 u64 last_linger_id; 355 struct rb_root linger_requests; /* lingering requests */ 356 struct rb_root map_checks; 357 struct rb_root linger_map_checks; 358 atomic_t num_requests; 359 atomic_t num_homeless; 360 int abort_err; 361 struct delayed_work timeout_work; 362 struct delayed_work osds_timeout_work; 363 #ifdef CONFIG_DEBUG_FS 364 struct dentry *debugfs_file; 365 #endif 366 367 mempool_t *req_mempool; 368 369 struct ceph_msgpool msgpool_op; 370 struct ceph_msgpool msgpool_op_reply; 371 372 struct workqueue_struct *notify_wq; 373 struct workqueue_struct *completion_wq; 374 }; 375 376 static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) 377 { 378 return osdc->osdmap->flags & flag; 379 } 380 381 extern int ceph_osdc_setup(void); 382 extern void ceph_osdc_cleanup(void); 383 384 extern int ceph_osdc_init(struct ceph_osd_client *osdc, 385 struct ceph_client *client); 386 extern void ceph_osdc_stop(struct ceph_osd_client *osdc); 387 extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc); 388 389 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, 390 struct ceph_msg *msg); 391 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, 392 struct ceph_msg *msg); 393 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); 394 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err); 395 void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc); 396 397 #define osd_req_op_data(oreq, whch, typ, fld) \ 398 ({ \ 399 struct ceph_osd_request *__oreq = (oreq); \ 400 unsigned int __whch = (whch); \ 401 BUG_ON(__whch >= __oreq->r_num_ops); \ 402 &__oreq->r_ops[__whch].typ.fld; \ 403 }) 404 405 extern void osd_req_op_init(struct ceph_osd_request *osd_req, 406 unsigned int which, u16 opcode, u32 flags); 407 408 extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *, 409 unsigned int which, 410 struct page **pages, u64 length, 411 u32 alignment, bool pages_from_pool, 412 bool own_pages); 413 414 extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req, 415 unsigned int which, u16 opcode, 416 u64 offset, u64 length, 417 u64 truncate_size, u32 truncate_seq); 418 extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req, 419 unsigned int which, u64 length); 420 extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req, 421 unsigned int which, u64 offset_inc); 422 423 extern struct ceph_osd_data *osd_req_op_extent_osd_data( 424 struct ceph_osd_request *osd_req, 425 unsigned int which); 426 427 extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, 428 unsigned int which, 429 struct page **pages, u64 length, 430 u32 alignment, bool pages_from_pool, 431 bool own_pages); 432 extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, 433 unsigned int which, 434 struct ceph_pagelist *pagelist); 435 #ifdef CONFIG_BLOCK 436 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, 437 unsigned int which, 438 struct ceph_bio_iter *bio_pos, 439 u32 bio_length); 440 #endif /* CONFIG_BLOCK */ 441 void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, 442 unsigned int which, 443 struct bio_vec *bvecs, u32 num_bvecs, 444 u32 bytes); 445 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, 446 unsigned int which, 447 struct ceph_bvec_iter *bvec_pos); 448 449 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, 450 unsigned int which, 451 struct ceph_pagelist *pagelist); 452 extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, 453 unsigned int which, 454 struct page **pages, u64 length, 455 u32 alignment, bool pages_from_pool, 456 bool own_pages); 457 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, 458 unsigned int which, 459 struct bio_vec *bvecs, u32 num_bvecs, 460 u32 bytes); 461 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, 462 unsigned int which, 463 struct page **pages, u64 length, 464 u32 alignment, bool pages_from_pool, 465 bool own_pages); 466 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which, 467 const char *class, const char *method); 468 extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which, 469 u16 opcode, const char *name, const void *value, 470 size_t size, u8 cmp_op, u8 cmp_mode); 471 extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, 472 unsigned int which, 473 u64 expected_object_size, 474 u64 expected_write_size); 475 476 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, 477 struct ceph_snap_context *snapc, 478 unsigned int num_ops, 479 bool use_mempool, 480 gfp_t gfp_flags); 481 int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp); 482 483 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, 484 struct ceph_file_layout *layout, 485 struct ceph_vino vino, 486 u64 offset, u64 *len, 487 unsigned int which, int num_ops, 488 int opcode, int flags, 489 struct ceph_snap_context *snapc, 490 u32 truncate_seq, u64 truncate_size, 491 bool use_mempool); 492 493 extern void ceph_osdc_get_request(struct ceph_osd_request *req); 494 extern void ceph_osdc_put_request(struct ceph_osd_request *req); 495 496 extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, 497 struct ceph_osd_request *req, 498 bool nofail); 499 extern void ceph_osdc_cancel_request(struct ceph_osd_request *req); 500 extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, 501 struct ceph_osd_request *req); 502 extern void ceph_osdc_sync(struct ceph_osd_client *osdc); 503 504 extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc); 505 void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc); 506 507 int ceph_osdc_call(struct ceph_osd_client *osdc, 508 struct ceph_object_id *oid, 509 struct ceph_object_locator *oloc, 510 const char *class, const char *method, 511 unsigned int flags, 512 struct page *req_page, size_t req_len, 513 struct page **resp_pages, size_t *resp_len); 514 515 int ceph_osdc_copy_from(struct ceph_osd_client *osdc, 516 u64 src_snapid, u64 src_version, 517 struct ceph_object_id *src_oid, 518 struct ceph_object_locator *src_oloc, 519 u32 src_fadvise_flags, 520 struct ceph_object_id *dst_oid, 521 struct ceph_object_locator *dst_oloc, 522 u32 dst_fadvise_flags, 523 u32 truncate_seq, u64 truncate_size, 524 u8 copy_from_flags); 525 526 /* watch/notify */ 527 struct ceph_osd_linger_request * 528 ceph_osdc_watch(struct ceph_osd_client *osdc, 529 struct ceph_object_id *oid, 530 struct ceph_object_locator *oloc, 531 rados_watchcb2_t wcb, 532 rados_watcherrcb_t errcb, 533 void *data); 534 int ceph_osdc_unwatch(struct ceph_osd_client *osdc, 535 struct ceph_osd_linger_request *lreq); 536 537 int ceph_osdc_notify_ack(struct ceph_osd_client *osdc, 538 struct ceph_object_id *oid, 539 struct ceph_object_locator *oloc, 540 u64 notify_id, 541 u64 cookie, 542 void *payload, 543 u32 payload_len); 544 int ceph_osdc_notify(struct ceph_osd_client *osdc, 545 struct ceph_object_id *oid, 546 struct ceph_object_locator *oloc, 547 void *payload, 548 u32 payload_len, 549 u32 timeout, 550 struct page ***preply_pages, 551 size_t *preply_len); 552 int ceph_osdc_watch_check(struct ceph_osd_client *osdc, 553 struct ceph_osd_linger_request *lreq); 554 int ceph_osdc_list_watchers(struct ceph_osd_client *osdc, 555 struct ceph_object_id *oid, 556 struct ceph_object_locator *oloc, 557 struct ceph_watch_item **watchers, 558 u32 *num_watchers); 559 #endif 560 561