4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2015, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
36 #include <linux/spinlock.h>
38 #include "lustre/lustre_idl.h"
39 #include "lustre_lib.h"
41 #include "lustre_export.h"
42 #include "lustre_fid.h"
43 #include "lustre_fld.h"
44 #include "lustre_handles.h"
45 #include "lustre_intent.h"
47 #define MAX_OBD_DEVICES 8192
55 struct lov_oinfo
{ /* per-stripe data structure */
56 struct ost_id loi_oi
; /* object ID/Sequence on the target OST */
57 int loi_ost_idx
; /* OST stripe index in lov_tgt_desc->tgts */
58 int loi_ost_gen
; /* generation of this loi_ost_idx */
60 unsigned long loi_kms_valid
:1;
61 __u64 loi_kms
; /* known minimum size */
62 struct ost_lvb loi_lvb
;
63 struct osc_async_rc loi_ar
;
66 static inline void loi_kms_set(struct lov_oinfo
*oinfo
, __u64 kms
)
69 oinfo
->loi_kms_valid
= 1;
72 static inline void loi_init(struct lov_oinfo
*loi
)
79 typedef int (*obd_enqueue_update_f
)(void *cookie
, int rc
);
81 /* obd info for a particular level (lov, osc). */
83 /* OBD_STATFS_* flags */
85 /* lsm data specific for every OSC. */
86 struct lov_stripe_md
*oi_md
;
87 /* statfs data specific for every OSC, if needed at all. */
88 struct obd_statfs
*oi_osfs
;
89 /* An update callback which is called to update some data on upper
90 * level. E.g. it is used for update lsm->lsm_oinfo at every received
91 * request in osc level for enqueue requests. It is also possible to
92 * update some caller data from LOV layer if needed.
94 obd_enqueue_update_f oi_cb_up
;
98 struct list_head typ_chain
;
99 struct obd_ops
*typ_dt_ops
;
100 struct md_ops
*typ_md_ops
;
101 struct dentry
*typ_debugfs_entry
;
104 struct lu_device_type
*typ_lu
;
105 spinlock_t obd_type_lock
;
106 struct kobject
*typ_kobj
;
116 struct timeout_item
{
117 enum timeout_event ti_event
;
118 unsigned long ti_timeout
;
121 struct list_head ti_obd_list
;
122 struct list_head ti_chain
;
125 #define OBD_MAX_RIF_DEFAULT 8
126 #define OBD_MAX_RIF_MAX 512
127 #define OSC_MAX_RIF_MAX 256
128 #define OSC_MAX_DIRTY_DEFAULT (OBD_MAX_RIF_DEFAULT * 4)
129 #define OSC_MAX_DIRTY_MB_MAX 2048 /* arbitrary, but < MAX_LONG bytes */
130 #define OSC_DEFAULT_RESENDS 10
132 /* possible values for fo_sync_lock_cancel */
134 NEVER_SYNC_ON_CANCEL
= 0,
135 BLOCKING_SYNC_ON_CANCEL
= 1,
136 ALWAYS_SYNC_ON_CANCEL
= 2,
137 NUM_SYNC_ON_CANCEL_STATES
140 enum obd_cl_sem_lock_class
{
147 * Limit reply buffer size for striping data to one x86_64 page. This
148 * value is chosen to fit the striping data for common use cases while
149 * staying well below the limit at which the buffer must be backed by
150 * vmalloc(). Excessive use of vmalloc() may cause spinlock contention
153 #define OBD_MAX_DEFAULT_EA_SIZE 4096
158 struct rw_semaphore cl_sem
;
159 struct obd_uuid cl_target_uuid
;
160 struct obd_import
*cl_import
; /* ptlrpc connection state */
161 size_t cl_conn_count
;
163 * Cache maximum and default values for easize. This is
164 * strictly a performance optimization to minimize calls to
165 * obd_size_diskmd(). The default values are used to calculate the
166 * initial size of a request buffer. The ptlrpc layer will resize the
167 * buffer as needed to accommodate a larger reply from the
168 * server. The default values should be small enough to avoid wasted
169 * memory and excessive use of vmalloc(), yet large enough to avoid
170 * reallocating the buffer in the common use case.
173 * Default EA size for striping attributes. It is initialized at
174 * mount-time based on the default stripe width of the filesystem,
175 * then it tracks the largest observed EA size advertised by
176 * the MDT, up to a maximum value of OBD_MAX_DEFAULT_EA_SIZE.
178 u32 cl_default_mds_easize
;
179 /* Maximum possible EA size computed at mount-time based on
180 * the number of OSTs in the filesystem. May be increased at
181 * run-time if a larger observed size is advertised by the MDT.
183 u32 cl_max_mds_easize
;
185 enum lustre_sec_part cl_sp_me
;
186 enum lustre_sec_part cl_sp_to
;
187 struct sptlrpc_flavor cl_flvr_mgc
; /* fixed flavor of mgc->mgs */
189 /* the grant values are protected by loi_list_lock below */
190 unsigned long cl_dirty_pages
; /* all _dirty_ in pahges */
191 unsigned long cl_dirty_max_pages
; /* allowed w/o rpc */
192 unsigned long cl_dirty_transit
; /* dirty synchronous */
193 unsigned long cl_avail_grant
; /* bytes of credit for ost */
194 unsigned long cl_lost_grant
; /* lost credits (trunc) */
196 /* since we allocate grant by blocks, we don't know how many grant will
197 * be used to add a page into cache. As a solution, we reserve maximum
198 * grant before trying to dirty a page and unreserve the rest.
199 * See osc_{reserve|unreserve}_grant for details.
201 long cl_reserved_grant
;
202 struct list_head cl_cache_waiters
; /* waiting for cache/grant */
203 unsigned long cl_next_shrink_grant
; /* jiffies */
204 struct list_head cl_grant_shrink_list
; /* Timeout event list */
205 int cl_grant_shrink_interval
; /* seconds */
207 /* A chunk is an optimal size used by osc_extent to determine
208 * the extent size. A chunk is max(PAGE_SIZE, OST block size)
211 unsigned int cl_extent_tax
; /* extent overhead, by bytes */
213 /* keep track of objects that have lois that contain pages which
214 * have been queued for async brw. this lock also protects the
215 * lists of osc_client_pages that hang off of the loi
218 * ->cl_loi_list_lock protects consistency of
219 * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
220 * ->ap_completion() call-backs are executed under this lock. As we
221 * cannot guarantee that these call-backs never block on all platforms
222 * (as a matter of fact they do block on Mac OS X), type of
223 * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux
224 * and blocking mutex on Mac OS X. (Alternative is to make this lock
225 * blocking everywhere, but we don't want to slow down fast-path of
226 * our main platform.)
228 * NB by Jinshan: though field names are still _loi_, but actually
229 * osc_object{}s are in the list.
231 spinlock_t cl_loi_list_lock
;
232 struct list_head cl_loi_ready_list
;
233 struct list_head cl_loi_hp_ready_list
;
234 struct list_head cl_loi_write_list
;
235 struct list_head cl_loi_read_list
;
236 __u32 cl_r_in_flight
;
237 __u32 cl_w_in_flight
;
238 /* just a sum of the loi/lop pending numbers to be exported by sysfs */
239 atomic_t cl_pending_w_pages
;
240 atomic_t cl_pending_r_pages
;
241 __u32 cl_max_pages_per_rpc
;
242 __u32 cl_max_rpcs_in_flight
;
243 struct obd_histogram cl_read_rpc_hist
;
244 struct obd_histogram cl_write_rpc_hist
;
245 struct obd_histogram cl_read_page_hist
;
246 struct obd_histogram cl_write_page_hist
;
247 struct obd_histogram cl_read_offset_hist
;
248 struct obd_histogram cl_write_offset_hist
;
250 /* lru for osc caching pages */
251 struct cl_client_cache
*cl_cache
;
252 struct list_head cl_lru_osc
; /* member of cl_cache->ccc_lru */
253 atomic_long_t
*cl_lru_left
;
254 atomic_long_t cl_lru_busy
;
255 atomic_long_t cl_lru_in_list
;
256 atomic_t cl_lru_shrinkers
;
257 struct list_head cl_lru_list
; /* lru page list */
258 spinlock_t cl_lru_list_lock
; /* page list protector */
259 atomic_long_t cl_unstable_count
;
261 /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
262 atomic_t cl_destroy_in_flight
;
263 wait_queue_head_t cl_destroy_waitq
;
265 struct mdc_rpc_lock
*cl_rpc_lock
;
267 /* modify rpcs in flight
268 * currently used for metadata only
270 spinlock_t cl_mod_rpcs_lock
;
271 u16 cl_max_mod_rpcs_in_flight
;
272 u16 cl_mod_rpcs_in_flight
;
273 u16 cl_close_rpcs_in_flight
;
274 wait_queue_head_t cl_mod_rpcs_waitq
;
275 unsigned long *cl_mod_tag_bitmap
;
276 struct obd_histogram cl_mod_rpcs_hist
;
279 atomic_t cl_mgc_refcount
;
280 struct obd_export
*cl_mgc_mgsexp
;
282 /* checksumming for data sent over the network */
283 unsigned int cl_checksum
:1; /* 0 = disabled, 1 = enabled */
284 /* supported checksum types that are worked out at connect time */
285 __u32 cl_supp_cksum_types
;
286 /* checksum algorithm to be used */
287 enum cksum_type cl_cksum_type
;
289 /* also protected by the poorly named _loi_list_lock lock above */
290 struct osc_async_rc cl_ar
;
292 /* sequence manager */
293 struct lu_client_seq
*cl_seq
;
295 atomic_t cl_resends
; /* resend count */
297 /* ptlrpc work for writeback in ptlrpcd context */
298 void *cl_writeback_work
;
300 /* hash tables for osc_quota_info */
301 struct cfs_hash
*cl_quota_hash
[MAXQUOTAS
];
304 #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
311 struct echo_client_obd
{
312 struct obd_export
*ec_exp
; /* the local connection to osc/lov */
314 struct list_head ec_objects
;
315 struct list_head ec_locks
;
319 /* Generic subset of OSTs */
321 __u32
*op_array
; /* array of index of lov_obd->lov_tgts */
322 unsigned int op_count
; /* number of OSTs in the array */
323 unsigned int op_size
; /* allocated size of lp_array */
324 struct rw_semaphore op_rw_sem
; /* to protect ost_pool use */
327 /* allow statfs data caching for 1 second */
328 #define OBD_STATFS_CACHE_SECONDS 1
330 struct lov_tgt_desc
{
331 struct list_head ltd_kill
;
332 struct obd_uuid ltd_uuid
;
333 struct obd_device
*ltd_obd
;
334 struct obd_export
*ltd_exp
;
336 __u32 ltd_index
; /* index in lov_obd->tgts */
337 unsigned long ltd_active
:1,/* is this target up for requests */
338 ltd_activate
:1,/* should target be activated */
339 ltd_reap
:1; /* should this target be deleted */
343 struct lov_desc desc
;
344 struct lov_tgt_desc
**lov_tgts
; /* sparse array */
345 struct ost_pool lov_packed
; /* all OSTs in a packed array */
346 struct mutex lov_lock
;
347 struct obd_connect_data lov_ocd
;
348 atomic_t lov_refcount
;
349 __u32 lov_death_row
;/* tgts scheduled to be deleted */
350 __u32 lov_tgt_size
; /* size of tgts array */
353 struct cfs_hash
*lov_pools_hash_body
; /* used for key access */
354 struct list_head lov_pool_list
; /* used for sequential access */
355 struct dentry
*lov_pool_debugfs_entry
;
356 enum lustre_sec_part lov_sp_me
;
358 /* Cached LRU and unstable data from upper layer */
359 struct cl_client_cache
*lov_cache
;
361 struct rw_semaphore lov_notify_lock
;
363 struct kobject
*lov_tgts_kobj
;
366 struct lmv_tgt_desc
{
367 struct obd_uuid ltd_uuid
;
368 struct obd_export
*ltd_exp
;
370 struct mutex ltd_fid_mutex
;
371 unsigned long ltd_active
:1; /* target up for requests */
374 enum placement_policy
{
375 PLACEMENT_CHAR_POLICY
= 0,
376 PLACEMENT_NID_POLICY
= 1,
377 PLACEMENT_INVAL_POLICY
= 2,
383 struct lu_client_fld lmv_fld
;
385 enum placement_policy lmv_placement
;
386 struct lmv_desc desc
;
387 struct obd_uuid cluuid
;
388 struct obd_export
*exp
;
390 struct mutex lmv_init_mutex
;
395 u32 tgts_size
; /* size of tgts array */
396 struct lmv_tgt_desc
**tgts
;
398 struct obd_connect_data conn_data
;
399 struct kobject
*lmv_tgts_kobj
;
402 struct niobuf_local
{
403 __u64 lnb_file_offset
;
404 __u32 lnb_page_offset
;
408 struct page
*lnb_page
;
412 #define LUSTRE_FLD_NAME "fld"
413 #define LUSTRE_SEQ_NAME "seq"
415 #define LUSTRE_MDD_NAME "mdd"
416 #define LUSTRE_OSD_LDISKFS_NAME "osd-ldiskfs"
417 #define LUSTRE_OSD_ZFS_NAME "osd-zfs"
418 #define LUSTRE_VVP_NAME "vvp"
419 #define LUSTRE_LMV_NAME "lmv"
420 #define LUSTRE_SLP_NAME "slp"
421 #define LUSTRE_LOD_NAME "lod"
422 #define LUSTRE_OSP_NAME "osp"
423 #define LUSTRE_LWP_NAME "lwp"
425 /* obd device type names */
426 /* FIXME all the references to LUSTRE_MDS_NAME should be swapped with LUSTRE_MDT_NAME */
427 #define LUSTRE_MDS_NAME "mds"
428 #define LUSTRE_MDT_NAME "mdt"
429 #define LUSTRE_MDC_NAME "mdc"
430 #define LUSTRE_OSS_NAME "ost" /* FIXME change name to oss */
431 #define LUSTRE_OST_NAME "obdfilter" /* FIXME change name to ost */
432 #define LUSTRE_OSC_NAME "osc"
433 #define LUSTRE_LOV_NAME "lov"
434 #define LUSTRE_MGS_NAME "mgs"
435 #define LUSTRE_MGC_NAME "mgc"
437 #define LUSTRE_ECHO_NAME "obdecho"
438 #define LUSTRE_ECHO_CLIENT_NAME "echo_client"
439 #define LUSTRE_QMT_NAME "qmt"
441 /* Constant obd names (post-rename) */
442 #define LUSTRE_MDS_OBDNAME "MDS"
443 #define LUSTRE_OSS_OBDNAME "OSS"
444 #define LUSTRE_MGS_OBDNAME "MGS"
445 #define LUSTRE_MGC_OBDNAME "MGC"
447 /* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
448 #define N_LOCAL_TEMP_PAGE 0x10000000
451 * Events signalled through obd_notify() upcall-chain.
453 enum obd_notify_event
{
456 /* Device connect start */
458 /* Device activated */
460 /* Device deactivated */
462 /* Device disconnected */
464 /* Connect data for import were changed */
467 OBD_NOTIFY_SYNC_NONBLOCK
,
469 /* Configuration event */
471 /* Administratively deactivate/activate event */
472 OBD_NOTIFY_DEACTIVATE
,
477 * Data structure used to pass obd_notify()-event to non-obd listeners (llite
478 * being main example).
480 struct obd_notify_upcall
{
481 int (*onu_upcall
)(struct obd_device
*host
, struct obd_device
*watched
,
482 enum obd_notify_event ev
, void *owner
, void *data
);
483 /* Opaque datum supplied by upper layer listener */
487 struct target_recovery_data
{
488 svc_handler_t trd_recovery_handler
;
489 pid_t trd_processing_task
;
490 struct completion trd_starting
;
491 struct completion trd_finishing
;
494 struct obd_llog_group
{
495 struct llog_ctxt
*olg_ctxts
[LLOG_MAX_CTXTS
];
496 wait_queue_head_t olg_waitq
;
498 struct mutex olg_cat_processing
;
501 /* corresponds to one of the obd's */
502 #define OBD_DEVICE_MAGIC 0XAB5CD6EF
504 struct lvfs_run_ctxt
{
505 struct dt_device
*dt
;
509 struct obd_type
*obd_type
;
510 u32 obd_magic
; /* OBD_DEVICE_MAGIC */
511 int obd_minor
; /* device number: lctl dl */
512 struct lu_device
*obd_lu_dev
;
514 /* common and UUID name of this device */
515 struct obd_uuid obd_uuid
;
516 char obd_name
[MAX_OBD_NAME
];
518 /* bitfield modification is protected by obd_dev_lock */
519 unsigned long obd_attached
:1, /* finished attach */
520 obd_set_up
:1, /* finished setup */
521 obd_version_recov
:1, /* obd uses version checking */
522 obd_replayable
:1,/* recovery is enabled; inform clients */
523 obd_no_transno
:1, /* no committed-transno notification */
524 obd_no_recov
:1, /* fail instead of retry messages */
525 obd_stopping
:1, /* started cleanup */
526 obd_starting
:1, /* started setup */
527 obd_force
:1, /* cleanup with > 0 obd refcount */
528 obd_fail
:1, /* cleanup with failover */
529 obd_no_conn
:1, /* deny new connections */
530 obd_inactive
:1, /* device active/inactive
531 * (for sysfs status only!!)
533 obd_no_ir
:1, /* no imperative recovery. */
534 obd_process_conf
:1; /* device is processing mgs config */
535 /* use separate field as it is set in interrupt to don't mess with
536 * protection of other bits using _bh lock
538 unsigned long obd_recovery_expired
:1;
539 /* uuid-export hash body */
540 struct cfs_hash
*obd_uuid_hash
;
541 wait_queue_head_t obd_refcount_waitq
;
542 struct list_head obd_exports
;
543 struct list_head obd_unlinked_exports
;
544 struct list_head obd_delayed_exports
;
545 atomic_t obd_refcount
;
547 spinlock_t obd_nid_lock
;
548 struct ldlm_namespace
*obd_namespace
;
549 struct ptlrpc_client obd_ldlm_client
; /* XXX OST/MDS only */
550 /* a spinlock is OK for what we do now, may need a semaphore later */
551 spinlock_t obd_dev_lock
; /* protect OBD bitfield above */
552 spinlock_t obd_osfs_lock
;
553 struct obd_statfs obd_osfs
; /* locked by obd_osfs_lock */
555 u64 obd_last_committed
;
556 struct mutex obd_dev_mutex
;
557 struct lvfs_run_ctxt obd_lvfs_ctxt
;
558 struct obd_llog_group obd_olg
; /* default llog group */
559 struct obd_device
*obd_observer
;
560 struct rw_semaphore obd_observer_link_sem
;
561 struct obd_notify_upcall obd_upcall
;
562 struct obd_export
*obd_self_export
;
565 struct client_obd cli
;
566 struct echo_client_obd echo_client
;
571 /* Fields used by LProcFS */
572 struct lprocfs_stats
*obd_stats
;
573 unsigned int obd_cntr_base
;
575 struct lprocfs_stats
*md_stats
;
576 unsigned int md_cntr_base
;
578 struct dentry
*obd_debugfs_entry
;
579 struct dentry
*obd_svc_debugfs_entry
;
580 struct lprocfs_stats
*obd_svc_stats
;
581 atomic_t obd_evict_inprogress
;
582 wait_queue_head_t obd_evict_inprogress_waitq
;
583 struct list_head obd_evict_list
; /* protected with pet_lock */
586 * Ldlm pool part. Save last calculated SLV and Limit.
588 rwlock_t obd_pool_lock
;
592 int obd_conn_inprogress
;
595 * A list of outstanding class_incref()'s against this obd. For
598 struct lu_ref obd_reference
;
600 struct kobject obd_kobj
; /* sysfs object */
601 struct completion obd_kobj_unregister
;
604 /* get/set_info keys */
605 #define KEY_ASYNC "async"
606 #define KEY_CHANGELOG_CLEAR "changelog_clear"
607 #define KEY_FID2PATH "fid2path"
608 #define KEY_CHECKSUM "checksum"
609 #define KEY_CLEAR_FS "clear_fs"
610 #define KEY_CONN_DATA "conn_data"
611 #define KEY_EVICT_BY_NID "evict_by_nid"
612 #define KEY_FIEMAP "fiemap"
613 #define KEY_FLUSH_CTX "flush_ctx"
614 #define KEY_GRANT_SHRINK "grant_shrink"
615 #define KEY_HSM_COPYTOOL_SEND "hsm_send"
616 #define KEY_INIT_RECOV_BACKUP "init_recov_bk"
617 #define KEY_INTERMDS "inter_mds"
618 #define KEY_LAST_ID "last_id"
619 #define KEY_LAST_FID "last_fid"
620 #define KEY_MAX_EASIZE "max_easize"
621 #define KEY_DEFAULT_EASIZE "default_easize"
622 #define KEY_MGSSEC "mgssec"
623 #define KEY_READ_ONLY "read-only"
624 #define KEY_REGISTER_TARGET "register_target"
625 #define KEY_SET_FS "set_fs"
626 #define KEY_TGT_COUNT "tgt_count"
627 /* KEY_SET_INFO in lustre_idl.h */
628 #define KEY_SPTLRPC_CONF "sptlrpc_conf"
630 #define KEY_CACHE_SET "cache_set"
631 #define KEY_CACHE_LRU_SHRINK "cache_lru_shrink"
635 static inline int it_to_lock_mode(struct lookup_intent
*it
)
637 /* CREAT needs to be tested before open (both could be set) */
638 if (it
->it_op
& IT_CREAT
)
640 else if (it
->it_op
& (IT_GETATTR
| IT_OPEN
| IT_LOOKUP
|
643 else if (it
->it_op
& IT_READDIR
)
645 else if (it
->it_op
& IT_GETXATTR
)
647 else if (it
->it_op
& IT_SETXATTR
)
650 LASSERTF(0, "Invalid it_op: %d\n", it
->it_op
);
655 MF_MDC_CANCEL_FID1
= BIT(0),
656 MF_MDC_CANCEL_FID2
= BIT(1),
657 MF_MDC_CANCEL_FID3
= BIT(2),
658 MF_MDC_CANCEL_FID4
= BIT(3),
659 MF_GET_MDT_IDX
= BIT(4),
663 CLI_SET_MEA
= BIT(0),
664 CLI_RM_ENTRY
= BIT(1),
667 CLI_MIGRATE
= BIT(4),
671 struct lu_fid op_fid1
; /* operation fid1 (usually parent) */
672 struct lu_fid op_fid2
; /* operation fid2 (usually child) */
673 struct lu_fid op_fid3
; /* 2 extra fids to find conflicting */
674 struct lu_fid op_fid4
; /* to the operation locks. */
675 u32 op_mds
; /* what mds server open will go to */
676 struct lustre_handle op_handle
;
681 struct lmv_stripe_md
*op_mea1
;
682 struct lmv_stripe_md
*op_mea2
;
683 __u32 op_suppgids
[2];
690 /* iattr fields and blocks. */
691 struct iattr op_attr
;
692 unsigned int op_attr_flags
;
694 loff_t op_attr_blocks
;
698 /* Various operation flags. */
699 enum mds_op_bias op_bias
;
701 /* Used by readdir */
704 /* Used by readdir */
707 /* used to transfer info between the stacks of MD client
708 * see enum op_cli_flags
710 enum md_cli_flags op_cli_flags
;
712 /* File object data version for HSM release, on client */
713 __u64 op_data_version
;
714 struct lustre_handle op_lease_handle
;
716 /* default stripe offset */
717 __u32 op_default_stripe_offset
;
721 int (*md_blocking_ast
)(struct ldlm_lock
*lock
,
722 struct ldlm_lock_desc
*desc
,
723 void *data
, int flag
);
726 struct md_enqueue_info
;
727 /* metadata stat-ahead */
729 struct md_enqueue_info
{
730 struct md_op_data mi_data
;
731 struct lookup_intent mi_it
;
732 struct lustre_handle mi_lockh
;
733 struct inode
*mi_dir
;
734 int (*mi_cb
)(struct ptlrpc_request
*req
,
735 struct md_enqueue_info
*minfo
, int rc
);
740 struct module
*owner
;
741 int (*iocontrol
)(unsigned int cmd
, struct obd_export
*exp
, int len
,
742 void *karg
, void __user
*uarg
);
743 int (*get_info
)(const struct lu_env
*env
, struct obd_export
*,
744 __u32 keylen
, void *key
, __u32
*vallen
, void *val
);
745 int (*set_info_async
)(const struct lu_env
*, struct obd_export
*,
746 __u32 keylen
, void *key
,
747 __u32 vallen
, void *val
,
748 struct ptlrpc_request_set
*set
);
749 int (*setup
)(struct obd_device
*dev
, struct lustre_cfg
*cfg
);
750 int (*precleanup
)(struct obd_device
*dev
);
751 int (*cleanup
)(struct obd_device
*dev
);
752 int (*process_config
)(struct obd_device
*dev
, u32 len
, void *data
);
753 int (*postrecov
)(struct obd_device
*dev
);
754 int (*add_conn
)(struct obd_import
*imp
, struct obd_uuid
*uuid
,
756 int (*del_conn
)(struct obd_import
*imp
, struct obd_uuid
*uuid
);
757 /* connect to the target device with given connection
758 * data. @ocd->ocd_connect_flags is modified to reflect flags actually
759 * granted by the target, which are guaranteed to be a subset of flags
760 * asked for. If @ocd == NULL, use default parameters.
762 int (*connect
)(const struct lu_env
*env
,
763 struct obd_export
**exp
, struct obd_device
*src
,
764 struct obd_uuid
*cluuid
, struct obd_connect_data
*ocd
,
766 int (*reconnect
)(const struct lu_env
*env
,
767 struct obd_export
*exp
, struct obd_device
*src
,
768 struct obd_uuid
*cluuid
,
769 struct obd_connect_data
*ocd
,
771 int (*disconnect
)(struct obd_export
*exp
);
773 /* Initialize/finalize fids infrastructure. */
774 int (*fid_init
)(struct obd_device
*obd
,
775 struct obd_export
*exp
, enum lu_cli_type type
);
776 int (*fid_fini
)(struct obd_device
*obd
);
778 /* Allocate new fid according to passed @hint. */
779 int (*fid_alloc
)(const struct lu_env
*env
, struct obd_export
*exp
,
780 struct lu_fid
*fid
, struct md_op_data
*op_data
);
783 * Object with @fid is getting deleted, we may want to do something
786 int (*statfs
)(const struct lu_env
*, struct obd_export
*exp
,
787 struct obd_statfs
*osfs
, __u64 max_age
, __u32 flags
);
788 int (*statfs_async
)(struct obd_export
*exp
, struct obd_info
*oinfo
,
789 __u64 max_age
, struct ptlrpc_request_set
*set
);
790 int (*create
)(const struct lu_env
*env
, struct obd_export
*exp
,
792 int (*destroy
)(const struct lu_env
*env
, struct obd_export
*exp
,
794 int (*setattr
)(const struct lu_env
*, struct obd_export
*exp
,
796 int (*getattr
)(const struct lu_env
*env
, struct obd_export
*exp
,
798 int (*preprw
)(const struct lu_env
*env
, int cmd
,
799 struct obd_export
*exp
, struct obdo
*oa
, int objcount
,
800 struct obd_ioobj
*obj
, struct niobuf_remote
*remote
,
801 int *nr_pages
, struct niobuf_local
*local
);
802 int (*commitrw
)(const struct lu_env
*env
, int cmd
,
803 struct obd_export
*exp
, struct obdo
*oa
,
804 int objcount
, struct obd_ioobj
*obj
,
805 struct niobuf_remote
*remote
, int pages
,
806 struct niobuf_local
*local
, int rc
);
807 int (*init_export
)(struct obd_export
*exp
);
808 int (*destroy_export
)(struct obd_export
*exp
);
810 /* metadata-only methods */
811 int (*import_event
)(struct obd_device
*, struct obd_import
*,
812 enum obd_import_event
);
814 int (*notify
)(struct obd_device
*obd
, struct obd_device
*watched
,
815 enum obd_notify_event ev
, void *data
);
817 int (*health_check
)(const struct lu_env
*env
, struct obd_device
*);
818 struct obd_uuid
*(*get_uuid
)(struct obd_export
*exp
);
821 int (*quotactl
)(struct obd_device
*, struct obd_export
*,
822 struct obd_quotactl
*);
825 int (*pool_new
)(struct obd_device
*obd
, char *poolname
);
826 int (*pool_del
)(struct obd_device
*obd
, char *poolname
);
827 int (*pool_add
)(struct obd_device
*obd
, char *poolname
,
829 int (*pool_rem
)(struct obd_device
*obd
, char *poolname
,
831 void (*getref
)(struct obd_device
*obd
);
832 void (*putref
)(struct obd_device
*obd
);
834 * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
835 * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
836 * Also, add a wrapper function in include/linux/obd_class.h.
842 struct mdt_body
*body
;
843 struct lu_buf layout
;
844 struct lmv_stripe_md
*lmv
;
845 #ifdef CONFIG_FS_POSIX_ACL
846 struct posix_acl
*posix_acl
;
848 struct mdt_remote_perm
*remote_perm
;
851 struct md_open_data
{
852 struct obd_client_handle
*mod_och
;
853 struct ptlrpc_request
*mod_open_req
;
854 struct ptlrpc_request
*mod_close_req
;
855 atomic_t mod_refcount
;
859 struct obd_client_handle
{
860 struct lustre_handle och_fh
;
861 struct lu_fid och_fid
;
862 struct md_open_data
*och_mod
;
863 struct lustre_handle och_lease_handle
; /* open lock for lease */
868 #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
870 struct lookup_intent
;
874 int (*getstatus
)(struct obd_export
*, struct lu_fid
*);
875 int (*null_inode
)(struct obd_export
*, const struct lu_fid
*);
876 int (*close
)(struct obd_export
*, struct md_op_data
*,
877 struct md_open_data
*, struct ptlrpc_request
**);
878 int (*create
)(struct obd_export
*, struct md_op_data
*,
879 const void *, size_t, umode_t
, uid_t
, gid_t
,
880 cfs_cap_t
, __u64
, struct ptlrpc_request
**);
881 int (*enqueue
)(struct obd_export
*, struct ldlm_enqueue_info
*,
882 const union ldlm_policy_data
*,
883 struct lookup_intent
*, struct md_op_data
*,
884 struct lustre_handle
*, __u64
);
885 int (*getattr
)(struct obd_export
*, struct md_op_data
*,
886 struct ptlrpc_request
**);
887 int (*getattr_name
)(struct obd_export
*, struct md_op_data
*,
888 struct ptlrpc_request
**);
889 int (*intent_lock
)(struct obd_export
*, struct md_op_data
*,
890 struct lookup_intent
*,
891 struct ptlrpc_request
**,
892 ldlm_blocking_callback
, __u64
);
893 int (*link
)(struct obd_export
*, struct md_op_data
*,
894 struct ptlrpc_request
**);
895 int (*rename
)(struct obd_export
*, struct md_op_data
*,
896 const char *, size_t, const char *, size_t,
897 struct ptlrpc_request
**);
898 int (*setattr
)(struct obd_export
*, struct md_op_data
*, void *,
899 size_t, struct ptlrpc_request
**);
900 int (*sync
)(struct obd_export
*, const struct lu_fid
*,
901 struct ptlrpc_request
**);
902 int (*read_page
)(struct obd_export
*, struct md_op_data
*,
903 struct md_callback
*cb_op
, __u64 hash_offset
,
904 struct page
**ppage
);
905 int (*unlink
)(struct obd_export
*, struct md_op_data
*,
906 struct ptlrpc_request
**);
908 int (*setxattr
)(struct obd_export
*, const struct lu_fid
*,
909 u64
, const char *, const char *, int, int, int, __u32
,
910 struct ptlrpc_request
**);
912 int (*getxattr
)(struct obd_export
*, const struct lu_fid
*,
913 u64
, const char *, const char *, int, int, int,
914 struct ptlrpc_request
**);
916 int (*init_ea_size
)(struct obd_export
*, u32
, u32
);
918 int (*get_lustre_md
)(struct obd_export
*, struct ptlrpc_request
*,
919 struct obd_export
*, struct obd_export
*,
922 int (*free_lustre_md
)(struct obd_export
*, struct lustre_md
*);
924 int (*merge_attr
)(struct obd_export
*,
925 const struct lmv_stripe_md
*lsm
,
926 struct cl_attr
*attr
, ldlm_blocking_callback
);
928 int (*set_open_replay_data
)(struct obd_export
*,
929 struct obd_client_handle
*,
930 struct lookup_intent
*);
931 int (*clear_open_replay_data
)(struct obd_export
*,
932 struct obd_client_handle
*);
933 int (*set_lock_data
)(struct obd_export
*, const struct lustre_handle
*,
936 enum ldlm_mode (*lock_match
)(struct obd_export
*, __u64
,
937 const struct lu_fid
*, enum ldlm_type
,
938 union ldlm_policy_data
*, enum ldlm_mode
,
939 struct lustre_handle
*);
941 int (*cancel_unused
)(struct obd_export
*, const struct lu_fid
*,
942 union ldlm_policy_data
*, enum ldlm_mode
,
943 enum ldlm_cancel_flags flags
, void *opaque
);
945 int (*get_fid_from_lsm
)(struct obd_export
*,
946 const struct lmv_stripe_md
*,
947 const char *name
, int namelen
,
950 int (*intent_getattr_async
)(struct obd_export
*,
951 struct md_enqueue_info
*,
952 struct ldlm_enqueue_info
*);
954 int (*revalidate_lock
)(struct obd_export
*, struct lookup_intent
*,
955 struct lu_fid
*, __u64
*bits
);
957 int (*unpackmd
)(struct obd_export
*exp
, struct lmv_stripe_md
**plsm
,
958 const union lmv_mds_md
*lmv
, size_t lmv_size
);
960 * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
961 * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
962 * wrapper function in include/linux/obd_class.h.
966 static inline struct md_open_data
*obd_mod_alloc(void)
968 struct md_open_data
*mod
;
970 mod
= kzalloc(sizeof(*mod
), GFP_NOFS
);
973 atomic_set(&mod
->mod_refcount
, 1);
977 #define obd_mod_get(mod) atomic_inc(&(mod)->mod_refcount)
978 #define obd_mod_put(mod) \
980 if (atomic_dec_and_test(&(mod)->mod_refcount)) { \
981 if ((mod)->mod_open_req) \
982 ptlrpc_req_finished((mod)->mod_open_req); \
987 void obdo_from_inode(struct obdo
*dst
, struct inode
*src
, u32 valid
);
988 void obdo_set_parent_fid(struct obdo
*dst
, const struct lu_fid
*parent
);
990 /* return 1 if client should be resend request */
991 static inline int client_should_resend(int resend
, struct client_obd
*cli
)
993 return atomic_read(&cli
->cl_resends
) ?
994 atomic_read(&cli
->cl_resends
) > resend
: 1;
998 * Return device name for this device
1000 * XXX: lu_device is declared before obd_device, while a pointer pointing
1001 * back to obd_device in lu_device, so this helper function defines here
1002 * instead of in lu_object.h
1004 static inline const char *lu_dev_name(const struct lu_device
*lu_dev
)
1006 return lu_dev
->ld_obd
->obd_name
;
1009 static inline bool filename_is_volatile(const char *name
, size_t namelen
,
1015 if (strncmp(name
, LUSTRE_VOLATILE_HDR
, LUSTRE_VOLATILE_HDR_LEN
) != 0)
1018 /* caller does not care of idx */
1022 /* volatile file, the MDT can be set from name */
1023 /* name format is LUSTRE_VOLATILE_HDR:[idx]: */
1024 /* if no MDT is specified, use std way */
1025 if (namelen
< LUSTRE_VOLATILE_HDR_LEN
+ 2)
1027 /* test for no MDT idx case */
1028 if ((*(name
+ LUSTRE_VOLATILE_HDR_LEN
) == ':') &&
1029 (*(name
+ LUSTRE_VOLATILE_HDR_LEN
+ 1) == ':')) {
1033 /* we have an idx, read it */
1034 start
= name
+ LUSTRE_VOLATILE_HDR_LEN
+ 1;
1035 *idx
= simple_strtoul(start
, &end
, 0);
1037 * no digit, no trailing :, negative value
1039 if (((*idx
== 0) && (end
== start
)) ||
1040 (*end
!= ':') || (*idx
< 0))
1045 /* bad format of mdt idx, we cannot return an error
1046 * to caller so we use hash algo
1048 CERROR("Bad volatile file name format: %s\n",
1049 name
+ LUSTRE_VOLATILE_HDR_LEN
);
1053 static inline int cli_brw_size(struct obd_device
*obd
)
1055 return obd
->u
.cli
.cl_max_pages_per_rpc
<< PAGE_SHIFT
;
1059 * when RPC size or the max RPCs in flight is increased, the max dirty pages
1060 * of the client should be increased accordingly to avoid sending fragmented
1061 * RPCs over the network when the client runs out of the maximum dirty space
1062 * when so many RPCs are being generated.
1064 static inline void client_adjust_max_dirty(struct client_obd
*cli
)
1067 if (cli
->cl_dirty_max_pages
<= 0)
1068 cli
->cl_dirty_max_pages
=
1069 (OSC_MAX_DIRTY_DEFAULT
* 1024 * 1024) >> PAGE_SHIFT
;
1071 unsigned long dirty_max
= cli
->cl_max_rpcs_in_flight
*
1072 cli
->cl_max_pages_per_rpc
;
1074 if (dirty_max
> cli
->cl_dirty_max_pages
)
1075 cli
->cl_dirty_max_pages
= dirty_max
;
1078 if (cli
->cl_dirty_max_pages
> totalram_pages
/ 8)
1079 cli
->cl_dirty_max_pages
= totalram_pages
/ 8;
1082 #endif /* __OBD_H */