2 * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
19 #include <linux/types.h>
20 #include <linux/connector.h>
22 #define DST_NAMELEN 32
23 #define DST_NAME "dst"
26 /* Remove node with given id from storage */
28 /* Add remote node with given id to the storage */
30 /* Add local node with given id to the storage to be exported and used by remote peers */
32 /* Crypto initialization command (hash/cipher used to protect the connection) */
34 /* Security attributes for given connection (permissions for example) */
36 /* Register given node in the block layer subsystem */
44 char name
[DST_NAMELEN
];
47 /* Command itself (see above) */
49 /* Maximum number of pages per single request in this device */
51 /* Stale/error transaction scanning timeout in milliseconds */
52 __u32 trans_scan_timeout
;
53 /* Maximum number of retry sends before completing transaction as broken */
54 __u32 trans_max_retries
;
59 /* Reply command carries completion status */
68 * Unfortunaltely socket address structure is not exported to userspace
69 * and is redefined there.
71 #define SADDR_MAX_DATA 128
74 /* address family, AF_xxx */
75 unsigned short sa_family
;
76 /* 14 bytes of protocol address */
77 char sa_data
[SADDR_MAX_DATA
];
78 /* Number of bytes used in sa_data */
79 unsigned short sa_data_len
;
82 /* Address structure */
83 struct dst_network_ctl
85 /* Socket type: datagram, stream...*/
87 /* Let me guess, is it a Jupiter diameter? */
95 /* Cipher and hash names */
96 char cipher_algo
[DST_NAMELEN
];
97 char hash_algo
[DST_NAMELEN
];
99 /* Key sizes. Can be zero for digest for example */
100 unsigned int cipher_keysize
, hash_keysize
;
101 /* Alignment. Calculated by the DST itself. */
102 unsigned int crypto_attached_size
;
103 /* Number of threads to perform crypto operations */
107 /* Export security attributes have this bits checked in when client connects */
108 #define DST_PERM_READ (1<<0)
109 #define DST_PERM_WRITE (1<<1)
112 * Right now it is simple model, where each remote address
113 * is assigned to set of permissions it is allowed to perform.
114 * In real world block device does not know anything but
115 * reading and writing, so it should be more than enough.
117 struct dst_secure_user
119 unsigned int permissions
;
124 * Export control command: device to export and network address to accept
125 * clients to work with given device
127 struct dst_export_ctl
129 char device
[DST_NAMELEN
];
130 struct dst_network_ctl ctl
;
134 DST_CFG
= 1, /* Request remote configuration */
135 DST_IO
, /* IO command */
136 DST_IO_RESPONSE
, /* IO response */
137 DST_PING
, /* Keepalive message */
143 /* Network command itself, see above */
146 * Size of the attached data
147 * (in most cases, for READ command it means how many bytes were requested)
150 /* Crypto size: number of attached bytes with digest/hmac */
152 /* Here we can carry secret data */
154 /* Read/write bits, see how they are encoded in bio structure */
158 /* Unique command id (like transaction ID) */
160 /* Sector to start IO from */
162 /* Hash data is placed after this header */
167 * Convert command to/from network byte order.
168 * We do not use hton*() functions, since there is
169 * no 64-bit implementation.
171 static inline void dst_convert_cmd(struct dst_cmd
*c
)
173 c
->cmd
= __cpu_to_be32(c
->cmd
);
174 c
->csize
= __cpu_to_be32(c
->csize
);
175 c
->size
= __cpu_to_be32(c
->size
);
176 c
->sector
= __cpu_to_be64(c
->sector
);
177 c
->id
= __cpu_to_be64(c
->id
);
178 c
->flags
= __cpu_to_be64(c
->flags
);
179 c
->rw
= __cpu_to_be64(c
->rw
);
183 typedef __u64 dst_gen_t
;
187 #include <linux/blkdev.h>
188 #include <linux/bio.h>
189 #include <linux/device.h>
190 #include <linux/mempool.h>
191 #include <linux/net.h>
192 #include <linux/poll.h>
193 #include <linux/rbtree.h>
195 #ifdef CONFIG_DST_DEBUG
196 #define dprintk(f, a...) printk(KERN_NOTICE f, ##a)
198 static inline void __attribute__ ((format (printf
, 1, 2)))
199 dprintk(const char *fmt
, ...) {}
206 /* DST node we are working with */
209 /* Entry inside transaction tree */
210 struct rb_node trans_entry
;
212 /* Merlin kills this transaction when this memory cell equals zero */
215 /* How this transaction should be processed by crypto engine */
217 /* How many times this transaction was resent */
219 /* Completion status */
222 /* When did we send it to the remote peer */
226 * Well, computers does not speak, they have unique id instead */
229 /* Block IO we are working with */
232 /* Network command for above block IO request */
236 struct dst_crypto_engine
238 /* What should we do with all block requests */
239 struct crypto_hash
*hash
;
240 struct crypto_ablkcipher
*cipher
;
242 /* Pool of pages used to encrypt data into before sending */
246 /* What to do with current request */
248 /* Who we are and where do we go */
249 struct scatterlist
*src
, *dst
;
251 /* Maximum timeout waiting for encryption to be completed */
253 /* IV is a 64-bit sequential counter */
259 /* Cached temporary data lives here */
266 /* The main state protection */
267 struct mutex state_lock
;
269 /* Polling machinery for sockets */
271 wait_queue_head_t
*whead
;
272 /* Most of events are being waited here */
273 wait_queue_head_t thread_wait
;
276 struct dst_node
*node
;
278 /* Network address for this state */
279 struct dst_network_ctl ctl
;
281 /* Permissions to work with: read-only or rw connection */
284 /* Called when we need to clean private data */
285 void (* cleanup
)(struct dst_state
*st
);
287 /* Used by the server: BIO completion queues BIOs here */
288 struct list_head request_list
;
289 spinlock_t request_lock
;
291 /* Guess what? No, it is not number of planets */
294 /* This flags is set when connection should be dropped */
298 * Socket to work with. Second pointer is used for
299 * lockless check if socket was changed before performing
300 * next action (like working with cached polling result)
302 struct socket
*socket
, *read_socket
;
304 /* Cached preallocated data */
308 /* Currently processed command */
317 /* Local device name for export devices */
318 char local
[DST_NAMELEN
];
321 struct dst_network_ctl net
;
323 /* Sysfs bits use this */
324 struct device device
;
329 struct list_head node_entry
;
331 /* Hi, my name is stored here */
332 char name
[DST_NAMELEN
];
333 /* My cache name is stored here */
334 char cache_name
[DST_NAMELEN
];
336 /* Block device attached to given node.
337 * Only valid for exporting nodes */
338 struct block_device
*bdev
;
339 /* Network state machine for given peer */
340 struct dst_state
*state
;
342 /* Block IO machinery */
343 struct request_queue
*queue
;
344 struct gendisk
*disk
;
346 /* Number of threads in processing pool */
348 /* Maximum number of pages in single IO */
351 /* I'm that big in bytes */
354 /* Exported to userspace node information */
355 struct dst_info
*info
;
358 * Security attribute list.
359 * Used only by exporting node currently.
361 struct list_head security_list
;
362 struct mutex security_lock
;
365 * When this unerflows below zero, university collapses.
366 * But this will not happen, since node will be freed,
367 * when reference counter reaches zero.
371 /* How precisely should I be started? */
372 int (*start
)(struct dst_node
*);
374 /* Crypto capabilities */
375 struct dst_crypto_ctl crypto
;
379 /* Pool of processing thread */
380 struct thread_pool
*pool
;
382 /* Transaction IDs live here */
386 * How frequently and how many times transaction
387 * tree should be scanned to drop stale objects.
389 long trans_scan_timeout
;
390 int trans_max_retries
;
392 /* Small gnomes live here */
393 struct rb_root trans_root
;
394 struct mutex trans_lock
;
397 * Transaction cache/memory pool.
398 * It is big enough to contain not only transaction
399 * itself, but additional crypto data (digest/hmac).
401 struct kmem_cache
*trans_cache
;
402 mempool_t
*trans_pool
;
404 /* This entity scans transaction tree */
405 struct delayed_work trans_work
;
407 wait_queue_head_t wait
;
410 /* Kernel representation of the security attribute */
413 struct list_head sec_entry
;
414 struct dst_secure_user sec
;
417 int dst_process_bio(struct dst_node
*n
, struct bio
*bio
);
419 int dst_node_init_connected(struct dst_node
*n
, struct dst_network_ctl
*r
);
420 int dst_node_init_listened(struct dst_node
*n
, struct dst_export_ctl
*le
);
422 static inline struct dst_state
*dst_state_get(struct dst_state
*st
)
424 BUG_ON(atomic_read(&st
->refcnt
) == 0);
425 atomic_inc(&st
->refcnt
);
429 void dst_state_put(struct dst_state
*st
);
431 struct dst_state
*dst_state_alloc(struct dst_node
*n
);
432 int dst_state_socket_create(struct dst_state
*st
);
433 void dst_state_socket_release(struct dst_state
*st
);
435 void dst_state_exit_connected(struct dst_state
*st
);
437 int dst_state_schedule_receiver(struct dst_state
*st
);
439 void dst_dump_addr(struct socket
*sk
, struct sockaddr
*sa
, char *str
);
441 static inline void dst_state_lock(struct dst_state
*st
)
443 mutex_lock(&st
->state_lock
);
446 static inline void dst_state_unlock(struct dst_state
*st
)
448 mutex_unlock(&st
->state_lock
);
451 void dst_poll_exit(struct dst_state
*st
);
452 int dst_poll_init(struct dst_state
*st
);
454 static inline unsigned int dst_state_poll(struct dst_state
*st
)
456 unsigned int revents
= POLLHUP
| POLLERR
;
460 revents
= st
->socket
->ops
->poll(NULL
, st
->socket
, NULL
);
461 dst_state_unlock(st
);
466 static inline int dst_thread_setup(void *private, void *data
)
471 void dst_node_put(struct dst_node
*n
);
473 static inline struct dst_node
*dst_node_get(struct dst_node
*n
)
475 atomic_inc(&n
->refcnt
);
479 int dst_data_recv(struct dst_state
*st
, void *data
, unsigned int size
);
480 int dst_recv_cdata(struct dst_state
*st
, void *cdata
);
481 int dst_data_send_header(struct socket
*sock
,
482 void *data
, unsigned int size
, int more
);
484 int dst_send_bio(struct dst_state
*st
, struct dst_cmd
*cmd
, struct bio
*bio
);
486 int dst_process_io(struct dst_state
*st
);
487 int dst_export_crypto(struct dst_node
*n
, struct bio
*bio
);
488 int dst_export_send_bio(struct bio
*bio
);
489 int dst_start_export(struct dst_node
*n
);
491 int __init
dst_export_init(void);
492 void dst_export_exit(void);
494 /* Private structure for export block IO requests */
495 struct dst_export_priv
497 struct list_head request_entry
;
498 struct dst_state
*state
;
503 static inline void dst_trans_get(struct dst_trans
*t
)
505 atomic_inc(&t
->refcnt
);
508 struct dst_trans
*dst_trans_search(struct dst_node
*node
, dst_gen_t gen
);
509 int dst_trans_remove(struct dst_trans
*t
);
510 int dst_trans_remove_nolock(struct dst_trans
*t
);
511 void dst_trans_put(struct dst_trans
*t
);
514 * Convert bio into network command.
516 static inline void dst_bio_to_cmd(struct bio
*bio
, struct dst_cmd
*cmd
,
520 cmd
->flags
= (bio
->bi_flags
<< BIO_POOL_BITS
) >> BIO_POOL_BITS
;
521 cmd
->rw
= bio
->bi_rw
;
522 cmd
->size
= bio
->bi_size
;
525 cmd
->sector
= bio
->bi_sector
;
528 int dst_trans_send(struct dst_trans
*t
);
529 int dst_trans_crypto(struct dst_trans
*t
);
531 int dst_node_crypto_init(struct dst_node
*n
, struct dst_crypto_ctl
*ctl
);
532 void dst_node_crypto_exit(struct dst_node
*n
);
534 static inline int dst_need_crypto(struct dst_node
*n
)
536 struct dst_crypto_ctl
*c
= &n
->crypto
;
538 * Logical OR is appropriate here, but boolean one produces
539 * more optimal code, so it is used instead.
541 return (c
->hash_algo
[0] | c
->cipher_algo
[0]);
544 int dst_node_trans_init(struct dst_node
*n
, unsigned int size
);
545 void dst_node_trans_exit(struct dst_node
*n
);
549 * Ready list contains threads currently free to be used,
550 * active one contains threads with some work scheduled for them.
551 * Caller can wait in given queue when thread is ready.
556 struct mutex thread_lock
;
557 struct list_head ready_list
, active_list
;
559 wait_queue_head_t wait
;
562 void thread_pool_del_worker(struct thread_pool
*p
);
563 void thread_pool_del_worker_id(struct thread_pool
*p
, unsigned int id
);
564 int thread_pool_add_worker(struct thread_pool
*p
,
567 void *(* init
)(void *data
),
568 void (* cleanup
)(void *data
),
571 void thread_pool_destroy(struct thread_pool
*p
);
572 struct thread_pool
*thread_pool_create(int num
, char *name
,
573 void *(* init
)(void *data
),
574 void (* cleanup
)(void *data
),
577 int thread_pool_schedule(struct thread_pool
*p
,
578 int (* setup
)(void *stored_private
, void *setup_data
),
579 int (* action
)(void *stored_private
, void *setup_data
),
580 void *setup_data
, long timeout
);
581 int thread_pool_schedule_private(struct thread_pool
*p
,
582 int (* setup
)(void *private, void *data
),
583 int (* action
)(void *private, void *data
),
584 void *data
, long timeout
, void *id
);
586 #endif /* __KERNEL__ */