2 md_k.h : kernel internal structure of the Linux MD driver
3 Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 You should have received a copy of the GNU General Public License
11 (for example /usr/src/linux/COPYING); if not, write to the Free
12 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 #define MD_RESERVED 0UL
24 #define TRANSLUCENT 5UL
26 #define MAX_PERSONALITY 7UL
28 extern inline int pers_to_level (int pers
)
32 case TRANSLUCENT
: return -2;
33 case LINEAR
: return -1;
38 panic("pers_to_level()");
41 extern inline int level_to_pers (int level
)
45 case -2: return TRANSLUCENT
;
46 case -1: return LINEAR
;
55 typedef struct mddev_s mddev_t
;
56 typedef struct mdk_rdev_s mdk_rdev_t
;
59 #error MD doesnt handle bigger kdev yet
62 #define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */
65 * Maps a kdev to an mddev/subdev. How 'data' is handled is up to
66 * the personality. (eg. HSM uses this to identify individual LVs)
68 typedef struct dev_mapping_s
{
73 extern dev_mapping_t mddev_map
[MAX_MD_DEVS
];
75 extern inline mddev_t
* kdev_to_mddev (kdev_t dev
)
77 if (MAJOR(dev
) != MD_MAJOR
)
79 return mddev_map
[MINOR(dev
)].mddev
;
83 * options passed in raidrun:
86 #define MAX_CHUNK_SIZE (4096*1024)
91 #define MD_READAHEAD MAX_READAHEAD
93 extern inline int disk_faulty(mdp_disk_t
* d
)
95 return d
->state
& (1 << MD_DISK_FAULTY
);
98 extern inline int disk_active(mdp_disk_t
* d
)
100 return d
->state
& (1 << MD_DISK_ACTIVE
);
103 extern inline int disk_sync(mdp_disk_t
* d
)
105 return d
->state
& (1 << MD_DISK_SYNC
);
108 extern inline int disk_spare(mdp_disk_t
* d
)
110 return !disk_sync(d
) && !disk_active(d
) && !disk_faulty(d
);
113 extern inline int disk_removed(mdp_disk_t
* d
)
115 return d
->state
& (1 << MD_DISK_REMOVED
);
118 extern inline void mark_disk_faulty(mdp_disk_t
* d
)
120 d
->state
|= (1 << MD_DISK_FAULTY
);
123 extern inline void mark_disk_active(mdp_disk_t
* d
)
125 d
->state
|= (1 << MD_DISK_ACTIVE
);
128 extern inline void mark_disk_sync(mdp_disk_t
* d
)
130 d
->state
|= (1 << MD_DISK_SYNC
);
133 extern inline void mark_disk_spare(mdp_disk_t
* d
)
138 extern inline void mark_disk_removed(mdp_disk_t
* d
)
140 d
->state
= (1 << MD_DISK_FAULTY
) | (1 << MD_DISK_REMOVED
);
143 extern inline void mark_disk_inactive(mdp_disk_t
* d
)
145 d
->state
&= ~(1 << MD_DISK_ACTIVE
);
148 extern inline void mark_disk_nonsync(mdp_disk_t
* d
)
150 d
->state
&= ~(1 << MD_DISK_SYNC
);
154 * MD's 'extended' device
158 struct md_list_head same_set
; /* RAID devices within the same set */
159 struct md_list_head all
; /* all RAID devices */
160 struct md_list_head pending
; /* undetected RAID devices */
162 kdev_t dev
; /* Device number */
163 kdev_t old_dev
; /* "" when it was last imported */
164 unsigned long size
; /* Device size (in blocks) */
165 mddev_t
*mddev
; /* RAID array if running */
166 unsigned long last_events
; /* IO event timestamp */
168 struct block_device
*bdev
; /* block device handle */
171 unsigned long sb_offset
;
173 int faulty
; /* if faulty do not issue IO requests */
174 int desc_nr
; /* descriptor index in the superblock */
179 * disk operations in a working array:
181 #define DISKOP_SPARE_INACTIVE 0
182 #define DISKOP_SPARE_WRITE 1
183 #define DISKOP_SPARE_ACTIVE 2
184 #define DISKOP_HOT_REMOVE_DISK 3
185 #define DISKOP_HOT_ADD_DISK 4
187 typedef struct mdk_personality_s mdk_personality_t
;
192 mdk_personality_t
*pers
;
196 struct md_list_head disks
;
200 unsigned long curr_resync
; /* blocks scheduled */
201 unsigned long resync_mark
; /* a recent timestamp */
202 unsigned long resync_mark_cnt
;/* blocks written at resync_mark */
204 int recovery_running
;
205 struct semaphore reconfig_sem
;
206 struct semaphore recovery_sem
;
207 struct semaphore resync_sem
;
210 atomic_t recovery_active
; /* blocks scheduled, but not written */
211 md_wait_queue_head_t recovery_wait
;
213 struct md_list_head all_mddevs
;
216 struct mdk_personality_s
219 int (*make_request
)(mddev_t
*mddev
, int rw
, struct buffer_head
* bh
);
220 int (*run
)(mddev_t
*mddev
);
221 int (*stop
)(mddev_t
*mddev
);
222 int (*status
)(char *page
, mddev_t
*mddev
);
223 int (*error_handler
)(mddev_t
*mddev
, kdev_t dev
);
226 * Some personalities (RAID-1, RAID-5) can have disks hot-added and
227 * hot-removed. Hot removal is different from failure. (failure marks
228 * a disk inactive, but the disk is still part of the array) The interface
229 * to such operations is the 'pers->diskop()' function, can be NULL.
231 * the diskop function can change the pointer pointing to the incoming
232 * descriptor, but must do so very carefully. (currently only
233 * SPARE_ACTIVE expects such a change)
235 int (*diskop
) (mddev_t
*mddev
, mdp_disk_t
**descriptor
, int state
);
237 int (*stop_resync
)(mddev_t
*mddev
);
238 int (*restart_resync
)(mddev_t
*mddev
);
239 int (*sync_request
)(mddev_t
*mddev
, unsigned long block_nr
);
244 * Currently we index md_array directly, based on the minor
245 * number. This will have to change to dynamic allocation
246 * once we start supporting partitioning of md devices.
248 extern inline int mdidx (mddev_t
* mddev
)
250 return mddev
->__minor
;
253 extern inline kdev_t
mddev_to_kdev(mddev_t
* mddev
)
255 return MKDEV(MD_MAJOR
, mdidx(mddev
));
258 extern mdk_rdev_t
* find_rdev(mddev_t
* mddev
, kdev_t dev
);
259 extern mdk_rdev_t
* find_rdev_nr(mddev_t
*mddev
, int nr
);
262 * iterates through some rdev ringlist. It's safe to remove the
263 * current 'rdev'. Dont touch 'tmp' though.
265 #define ITERATE_RDEV_GENERIC(head,field,rdev,tmp) \
267 for (tmp = head.next; \
268 rdev = md_list_entry(tmp, mdk_rdev_t, field), \
269 tmp = tmp->next, tmp->prev != &head \
272 * iterates through the 'same array disks' ringlist
274 #define ITERATE_RDEV(mddev,rdev,tmp) \
275 ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
278 * Same as above, but assumes that the device has rdev->desc_nr numbered
279 * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
281 #define ITERATE_RDEV_ORDERED(mddev,rdev,i) \
282 for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
286 * Iterates through all 'RAID managed disks'
288 #define ITERATE_RDEV_ALL(rdev,tmp) \
289 ITERATE_RDEV_GENERIC(all_raid_disks,all,rdev,tmp)
292 * Iterates through 'pending RAID disks'
294 #define ITERATE_RDEV_PENDING(rdev,tmp) \
295 ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
298 * iterates through all used mddevs in the system.
300 #define ITERATE_MDDEV(mddev,tmp) \
302 for (tmp = all_mddevs.next; \
303 mddev = md_list_entry(tmp, mddev_t, all_mddevs), \
304 tmp = tmp->next, tmp->prev != &all_mddevs \
307 extern inline int lock_mddev (mddev_t
* mddev
)
309 return down_interruptible(&mddev
->reconfig_sem
);
312 extern inline void unlock_mddev (mddev_t
* mddev
)
314 up(&mddev
->reconfig_sem
);
317 #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
318 x = y; y = __tmp; } while (0)
320 typedef struct mdk_thread_s
{
321 void (*run
) (void *data
);
323 md_wait_queue_head_t wqueue
;
325 struct semaphore
*sem
;
326 struct task_struct
*tsk
;
330 #define THREAD_WAKEUP 0
332 #define MAX_DISKNAME_LEN 64
334 typedef struct dev_name_s
{
335 struct md_list_head list
;
337 char namebuf
[MAX_DISKNAME_LEN
];
342 #define __wait_event_lock_irq(wq, condition, lock) \
344 wait_queue_t __wait; \
345 init_waitqueue_entry(&__wait, current); \
347 add_wait_queue(&wq, &__wait); \
349 set_current_state(TASK_UNINTERRUPTIBLE); \
352 spin_unlock_irq(&lock); \
353 run_task_queue(&tq_disk); \
355 spin_lock_irq(&lock); \
357 current->state = TASK_RUNNING; \
358 remove_wait_queue(&wq, &__wait); \
361 #define wait_event_lock_irq(wq, condition, lock) \
365 __wait_event_lock_irq(wq, condition, lock); \