After transaction was sent, move into per-state tree not list. This speeds up reply...
[pohmelfs.git] / fs / pohmelfs / pohmelfs.h
blob63fd834a935bc28e56837cc7501cfe876592a88e
1 /*
2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
3 */
5 #ifndef __POHMELFS_H
6 #define __POHMELFS_H
8 #include <linux/backing-dev.h>
9 #include <linux/crypto.h>
10 #include <linux/fs.h>
11 #include <linux/kref.h>
12 #include <linux/list.h>
13 #include <linux/mutex.h>
14 #include <linux/net.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/printk.h>
18 #include <linux/slab.h>
19 #include <linux/time.h>
20 #include <linux/wait.h>
21 #include <linux/workqueue.h>
23 #include <crypto/sha.h>
25 #define dnet_bswap16(x) cpu_to_le16(x)
26 #define dnet_bswap32(x) cpu_to_le32(x)
27 #define dnet_bswap64(x) cpu_to_le64(x)
29 /* theese are needed for packet.h below to compile */
30 #define DNET_ID_SIZE SHA512_DIGEST_SIZE
31 #define DNET_CSUM_SIZE SHA512_DIGEST_SIZE
33 #define POHMELFS_INODE_COLUMN 3
36 * is not used in kernel, but we want to share the same header
37 * with userspace, so I put it here for compiler to shut up
39 int gettimeofday(struct timeval *, struct timezone *);
41 #include "packet.h"
43 static inline struct timespec pohmelfs_date(struct dnet_time *tm)
45 struct timespec ts;
47 ts.tv_sec = tm->tsec;
48 ts.tv_nsec = tm->tnsec;
50 return ts;
53 struct pohmelfs_cmd {
54 struct dnet_cmd cmd;
55 struct dnet_attr attr;
56 union {
57 struct dnet_io_attr io;
58 } p;
62 * Compare two IDs.
63 * Returns 1 when id1 > id2
64 * -1 when id1 < id2
65 * 0 when id1 = id2
67 static inline int dnet_id_cmp_str(const unsigned char *id1, const unsigned char *id2)
69 unsigned int i = 0;
71 for (i*=sizeof(unsigned long); i<DNET_ID_SIZE; ++i) {
72 if (id1[i] < id2[i])
73 return -1;
74 if (id1[i] > id2[i])
75 return 1;
78 return 0;
81 struct pohmelfs_state;
82 struct pohmelfs_sb;
83 struct pohmelfs_trans;
85 struct pohmelfs_trans_cb {
86 int (* init)(struct pohmelfs_trans *t);
87 int (* complete)(struct pohmelfs_trans *t, struct pohmelfs_state *recv);
88 int (* recv_reply)(struct pohmelfs_trans *t, struct pohmelfs_state *recv);
89 void (* destroy)(struct pohmelfs_trans *t);
92 struct pohmelfs_trans {
93 struct list_head trans_entry;
94 struct rb_node trans_node;
96 struct kref refcnt;
98 unsigned long trans;
100 struct inode *inode;
102 struct pohmelfs_state *st;
104 struct pohmelfs_cmd cmd;
106 u64 header_size, data_size;
108 void *data;
110 unsigned long long recv_offset;
111 void *recv_data;
113 struct pohmelfs_write_ctl *wctl;
114 void *priv;
116 struct pohmelfs_trans_cb cb;
119 struct pohmelfs_trans *pohmelfs_trans_alloc(struct inode *inode);
120 struct pohmelfs_trans *pohmelfs_trans_alloc_io_buf(struct inode *inode, int group, int command,
121 void *data, u64 offset, u64 size, int aflags, int ioflags, int type);
122 void pohmelfs_trans_put(struct pohmelfs_trans *t);
124 int pohmelfs_trans_insert(struct pohmelfs_trans *t);
125 int pohmelfs_trans_insert_tree(struct pohmelfs_state *st, struct pohmelfs_trans *t);
126 void pohmelfs_trans_remove(struct pohmelfs_trans *t);
127 struct pohmelfs_trans *pohmelfs_trans_lookup(struct pohmelfs_state *st, struct dnet_cmd *cmd);
129 struct pohmelfs_state {
130 struct pohmelfs_sb *psb;
131 struct list_head state_entry;
133 struct sockaddr_storage sa;
134 int addrlen;
135 struct socket *sock;
137 int group_id;
139 struct mutex trans_lock;
140 struct list_head trans_list;
141 struct rb_root trans_root;
143 struct kref refcnt;
145 int routes;
147 /* Waiting/polling machinery */
148 wait_queue_t wait;
149 wait_queue_head_t *whead;
151 struct work_struct send_work;
152 struct work_struct recv_work;
154 /* is set when dnet_cmd is being read, otherwise attached data */
155 int cmd_read;
156 /* currently read command reply */
157 struct dnet_cmd cmd;
159 uint64_t bsize; /* Block size */
160 uint64_t frsize; /* Fragment size */
161 uint64_t blocks; /* Filesystem size in frsize units */
162 uint64_t bfree; /* # free blocks */
163 uint64_t bavail; /* # free blocks for non-root */
166 struct pohmelfs_state *pohmelfs_state_create(struct pohmelfs_sb *psb, struct sockaddr_storage *sa, int addrlen,
167 int ask_route, int group_id);
168 struct pohmelfs_state *pohmelfs_state_lookup(struct pohmelfs_sb *psb, struct dnet_raw_id *id, int group);
169 int pohmelfs_grab_states(struct pohmelfs_sb *psb, struct pohmelfs_state ***stp);
171 static inline void pohmelfs_state_get(struct pohmelfs_state *st)
173 kref_get(&st->refcnt);
176 void pohmelfs_state_put(struct pohmelfs_state *st);
177 void pohmelfs_state_kill(struct pohmelfs_state *st);
179 struct pohmelfs_state *pohmelfs_addr_exist(struct pohmelfs_sb *psb, struct sockaddr_storage *sa, int addrlen);
181 void pohmelfs_state_schedule(struct pohmelfs_state *st);
183 __attribute__ ((format (printf, 2, 3))) void pohmelfs_print_addr(struct sockaddr_storage *addr, const char *fmt, ...);
185 #define POHMELFS_INODE_INFO_REMOVED (1<<0)
187 struct pohmelfs_inode_info {
188 struct dnet_raw_id id;
190 unsigned int mode;
191 unsigned int nlink;
192 unsigned int uid;
193 unsigned int gid;
194 unsigned int blocksize;
195 unsigned int namelen;
196 __u64 ino;
197 __u64 blocks;
198 __u64 rdev;
199 __u64 size;
200 __u64 version;
202 __u64 flags;
204 struct dnet_time ctime;
205 struct dnet_time mtime;
206 struct dnet_time atime;
207 } __attribute__ ((packed));
209 void pohmelfs_fill_inode_info(struct inode *inode, struct pohmelfs_inode_info *info);
210 void pohmelfs_fill_inode(struct inode *inode, struct pohmelfs_inode_info *info);
211 void pohmelfs_convert_inode_info(struct pohmelfs_inode_info *info);
213 struct pohmelfs_inode {
214 struct inode vfs_inode;
215 struct dnet_raw_id id;
216 struct dnet_raw_id parent_id;
218 struct rb_node node;
220 struct mutex lock;
222 int *groups;
223 int group_num;
225 time_t update;
226 int local;
229 int pohmelfs_send_dentry(struct pohmelfs_inode *pi, struct dnet_raw_id *id, const char *sname, int len, int sync);
230 struct pohmelfs_inode *pohmelfs_sb_inode_lookup(struct pohmelfs_sb *psb, struct dnet_raw_id *id);
232 struct pohmelfs_reconnect {
233 struct list_head reconnect_entry;
234 struct sockaddr_storage sa;
235 int addrlen;
236 int group_id;
239 int pohmelfs_state_add_reconnect(struct pohmelfs_state *st);
241 struct pohmelfs_path {
242 struct mutex lock;
243 char *data;
246 int pohmelfs_http_compat_id(struct pohmelfs_inode *pi);
248 struct pohmelfs_sb {
249 struct super_block *sb;
250 struct backing_dev_info bdi;
252 struct pohmelfs_inode *root;
254 spinlock_t inode_lock;
255 struct rb_root inode_root;
257 int http_compat;
258 struct pohmelfs_path *path;
260 int bdi_num;
262 struct rb_root route_root;
263 struct list_head state_list;
264 spinlock_t state_lock;
266 long read_wait_timeout;
267 long write_wait_timeout;
269 long sync_timeout;
270 struct delayed_work sync_work;
272 char *fsid;
273 int fsid_len;
275 int no_read_csum;
277 int need_exit;
279 atomic_long_t ino;
280 atomic_long_t trans;
282 struct crypto_hash *hash;
284 struct workqueue_struct *wq;
286 int *groups;
287 int group_num;
290 * number of copies to be successfully written to mark write as successful
291 * if not set, half of groups plus one must be successfully written, i.e. plain write quorum
293 int successful_write_count;
295 struct mutex reconnect_lock;
296 struct list_head reconnect_list;
297 struct list_head kill_state_list;
298 struct delayed_work reconnect_work;
299 long reconnect_timeout;
301 int keepalive_cnt, keepalive_interval, keepalive_idle;
303 int readdir_allocation;
305 int sync_on_close;
308 static inline struct pohmelfs_sb *pohmelfs_sb(struct super_block *sb)
310 return (struct pohmelfs_sb *)sb->s_fs_info;
313 static inline struct pohmelfs_inode *pohmelfs_inode(struct inode *inode)
315 return container_of(inode, struct pohmelfs_inode, vfs_inode);
318 struct pohmelfs_wait {
319 wait_queue_head_t wq;
320 struct pohmelfs_inode *pi;
321 void *ret;
322 atomic_long_t count;
323 int condition;
324 struct kref refcnt;
327 int pohmelfs_wait_init(struct pohmelfs_wait *wait, struct pohmelfs_inode *pi);
328 struct pohmelfs_wait *pohmelfs_wait_alloc(struct pohmelfs_inode *pi);
329 void pohmelfs_wait_put(struct pohmelfs_wait *wait);
330 static inline void pohmelfs_wait_get(struct pohmelfs_wait *wait)
332 kref_get(&wait->refcnt);
335 struct pohmelfs_inode_info_binary_package {
336 struct pohmelfs_inode_info info;
338 struct pohmelfs_wait wait;
341 struct pohmelfs_write_ctl {
342 struct pagevec pvec;
343 struct pohmelfs_inode_info *info;
345 struct kref refcnt;
346 atomic_t good_writes;
349 struct pohmelfs_dentry_disk {
350 struct dnet_raw_id id;
351 uint64_t ino;
352 int type;
353 int len;
354 char name[0];
355 } __attribute__((packed));
357 struct pohmelfs_dentry {
358 struct dnet_raw_id parent_id;
359 struct pohmelfs_dentry_disk disk;
362 extern struct kmem_cache *pohmelfs_inode_cache;
363 extern struct kmem_cache *pohmelfs_trans_cache;
364 extern struct kmem_cache *pohmelfs_inode_info_cache;
365 extern struct kmem_cache *pohmelfs_route_cache;
366 extern struct kmem_cache *pohmelfs_wait_cache;
367 extern struct kmem_cache *pohmelfs_io_cache;
368 extern struct kmem_cache *pohmelfs_inode_info_binary_package_cache;
369 extern struct kmem_cache *pohmelfs_write_cache;
370 extern struct kmem_cache *pohmelfs_dentry_cache;
372 struct inode *pohmelfs_alloc_inode(struct super_block *sb);
373 void pohmelfs_destroy_inode(struct inode *);
375 struct pohmelfs_inode *pohmelfs_existing_inode(struct pohmelfs_sb *psb, struct pohmelfs_inode_info *info);
376 struct pohmelfs_inode *pohmelfs_new_inode(struct pohmelfs_sb *psb, int mode);
377 int pohmelfs_hash(struct pohmelfs_sb *psb, const void *data, const size_t size, struct dnet_raw_id *id);
379 char *pohmelfs_dump_id(const unsigned char *id);
380 char *pohmelfs_dump_id_len_raw(const unsigned char *id, unsigned int len, char *dst);
382 int pohmelfs_write_command(struct pohmelfs_inode *pi, struct pohmelfs_write_ctl *ctl, loff_t offset, size_t len);
383 void pohmelfs_write_ctl_release(struct kref *kref);
384 int pohmelfs_metadata_inode(struct pohmelfs_inode *pi, int sync);
386 extern const struct file_operations pohmelfs_dir_fops;
387 extern const struct inode_operations pohmelfs_dir_inode_operations;
389 extern const struct file_operations pohmelfs_file_ops;
390 extern const struct inode_operations pohmelfs_file_inode_operations;
392 extern const struct inode_operations pohmelfs_symlink_inode_operations;
393 extern const struct inode_operations pohmelfs_special_inode_operations;
395 extern void *pohmelfs_scratch_buf;
396 extern int pohmelfs_scratch_buf_size;
399 * if this flag is set, pohmelfs_inode_info->data is owned by the caller,
400 * so sending path may use it on its own and free (using kfree) when it's done
402 * This logic does not work for shared buffers or
403 * when multiple transactions will be sent for single pohmelfs_inode_info
405 #define POHMELFS_IO_OWN (1<<0)
407 struct pohmelfs_io {
408 struct pohmelfs_inode *pi;
410 struct dnet_raw_id *id;
412 int cmd;
413 int type;
415 u64 offset, size;
416 u64 start, num;
418 u32 cflags;
419 u32 aflags;
420 u32 ioflags;
422 int group_id;
424 u32 alloc_flags;
425 void *data;
427 struct pohmelfs_write_ctl *wctl;
428 void *priv;
430 struct pohmelfs_trans_cb cb;
433 int pohmelfs_send_io_group(struct pohmelfs_io *pio, int group_id);
434 int pohmelfs_send_io(struct pohmelfs_io *pio);
435 int pohmelfs_send_buf_single(struct pohmelfs_io *pio, struct pohmelfs_state *st);
436 int pohmelfs_send_buf(struct pohmelfs_io *pio);
438 int pohmelfs_data_recv(struct pohmelfs_state *st, void *buf, u64 size, unsigned int flags);
439 int pohmelfs_recv(struct pohmelfs_trans *t, struct pohmelfs_state *recv, void *data, int size);
441 struct pohmelfs_route {
442 struct rb_node node;
443 int group_id;
444 struct dnet_raw_id id;
445 struct pohmelfs_state *st;
448 int pohmelfs_route_request(struct pohmelfs_state *st);
449 void pohmelfs_route_remove_all(struct pohmelfs_state *st);
451 struct pohmelfs_script_req {
452 char *obj_name;
453 int obj_len;
455 char *script_name;
456 int script_namelen;
458 void *binary;
459 int binary_size;
461 int group_id;
463 unsigned int cflags;
464 int sync;
466 struct dnet_raw_id *id;
468 int (* complete)(struct pohmelfs_trans *t, struct pohmelfs_state *recv);
469 void *ret;
470 int ret_cond;
473 int pohmelfs_send_script_request(struct pohmelfs_inode *parent, struct pohmelfs_script_req *req);
475 int pohmelfs_stat(struct pohmelfs_sb *psb, int sync);
477 static inline int pohmelfs_need_resync(struct pohmelfs_inode *pi)
479 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
480 return get_seconds() > pi->update + psb->sync_timeout;
483 #endif /* __POHMELFS_H */