1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
3 * Copyright (C) 1997 University of Chicago.
4 * See COPYRIGHT notice in top-level directory.
6 * Copyright (C) 2007 Oak Ridge National Laboratory
8 * Copyright (C) 2008 Sun Microsystems, Lustre group
11 #include "ad_lustre.h"
12 #include "adio_extern.h"
14 #define ADIOI_BUFFERED_WRITE \
16 if (req_off >= writebuf_off + writebuf_len) { \
18 ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
19 ADIO_EXPLICIT_OFFSET, writebuf_off, \
20 &status1, error_code); \
21 if (!(fd->atomicity)) \
22 ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
23 if (*error_code != MPI_SUCCESS) { \
24 *error_code = MPIO_Err_create_code(*error_code, \
25 MPIR_ERR_RECOVERABLE, \
27 __LINE__, MPI_ERR_IO, \
29 ADIOI_Free(writebuf); \
33 writebuf_off = req_off; \
34 /* stripe_size alignment */ \
35 writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
36 (writebuf_off / stripe_size + 1) * \
37 stripe_size - writebuf_off); \
38 if (!(fd->atomicity)) \
39 ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
40 ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
41 ADIO_EXPLICIT_OFFSET, \
42 writebuf_off, &status1, error_code); \
43 if (*error_code != MPI_SUCCESS) { \
44 *error_code = MPIO_Err_create_code(*error_code, \
45 MPIR_ERR_RECOVERABLE, \
47 __LINE__, MPI_ERR_IO, \
49 ADIOI_Free(writebuf); \
53 write_sz = (unsigned) (ADIOI_MIN(req_len, \
54 writebuf_off + writebuf_len - req_off)); \
55 ADIOI_Assert((ADIO_Offset)write_sz == \
56 ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
57 memcpy(writebuf + req_off - writebuf_off, (char *)buf +userbuf_off, write_sz); \
58 while (write_sz != req_len) { \
59 ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
60 ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
61 if (!(fd->atomicity)) \
62 ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
63 if (*error_code != MPI_SUCCESS) { \
64 *error_code = MPIO_Err_create_code(*error_code, \
65 MPIR_ERR_RECOVERABLE, myname, \
66 __LINE__, MPI_ERR_IO, \
68 ADIOI_Free(writebuf); \
71 req_len -= write_sz; \
72 userbuf_off += write_sz; \
73 writebuf_off += writebuf_len; \
74 /* stripe_size alignment */ \
75 writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
76 (writebuf_off / stripe_size + 1) * \
77 stripe_size - writebuf_off); \
78 if (!(fd->atomicity)) \
79 ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); \
80 ADIO_ReadContig(fd, writebuf, writebuf_len, MPI_BYTE, \
81 ADIO_EXPLICIT_OFFSET, \
82 writebuf_off, &status1, error_code); \
83 if (*error_code != MPI_SUCCESS) { \
84 *error_code = MPIO_Err_create_code(*error_code, \
85 MPIR_ERR_RECOVERABLE, myname, \
86 __LINE__, MPI_ERR_IO, \
88 ADIOI_Free(writebuf); \
91 write_sz = ADIOI_MIN(req_len, writebuf_len); \
92 memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
97 /* this macro is used when filetype is contig and buftype is not contig.
98 it does not do a read-modify-write and does not lock*/
99 #define ADIOI_BUFFERED_WRITE_WITHOUT_READ \
101 if (req_off >= writebuf_off + writebuf_len) { \
102 ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
103 ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, \
105 if (*error_code != MPI_SUCCESS) { \
106 *error_code = MPIO_Err_create_code(*error_code, \
107 MPIR_ERR_RECOVERABLE, \
109 __LINE__, MPI_ERR_IO, \
111 ADIOI_Free(writebuf); \
114 writebuf_off = req_off; \
115 /* stripe_size alignment */ \
116 writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
117 (writebuf_off / stripe_size + 1) * \
118 stripe_size - writebuf_off); \
120 write_sz = (unsigned) ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off); \
121 ADIOI_Assert((ADIO_Offset)write_sz == ADIOI_MIN(req_len, writebuf_off + writebuf_len - req_off)); \
122 memcpy(writebuf + req_off - writebuf_off, \
123 (char *)buf + userbuf_off, write_sz); \
124 while (write_sz != req_len) { \
125 ADIO_WriteContig(fd, writebuf, writebuf_len, MPI_BYTE, \
126 ADIO_EXPLICIT_OFFSET, writebuf_off, &status1, error_code); \
127 if (*error_code != MPI_SUCCESS) { \
128 *error_code = MPIO_Err_create_code(*error_code, \
129 MPIR_ERR_RECOVERABLE, myname, \
130 __LINE__, MPI_ERR_IO, \
132 ADIOI_Free(writebuf); \
135 req_len -= write_sz; \
136 userbuf_off += write_sz; \
137 writebuf_off += writebuf_len; \
138 /* stripe_size alignment */ \
139 writebuf_len = (unsigned) ADIOI_MIN(end_offset - writebuf_off + 1, \
140 (writebuf_off / stripe_size + 1) * \
141 stripe_size - writebuf_off); \
142 write_sz = ADIOI_MIN(req_len, writebuf_len); \
143 memcpy(writebuf, (char *)buf + userbuf_off, write_sz); \
147 void ADIOI_LUSTRE_WriteStrided(ADIO_File fd
, void *buf
, int count
,
148 MPI_Datatype datatype
, int file_ptr_type
,
149 ADIO_Offset offset
, ADIO_Status
* status
,
152 /* offset is in units of etype relative to the filetype. */
153 ADIOI_Flatlist_node
*flat_buf
, *flat_file
;
154 ADIO_Offset i_offset
, sum
, size_in_filetype
;
155 int i
, j
, k
, st_index
=0;
156 int n_etypes_in_filetype
;
157 ADIO_Offset num
, size
, n_filetypes
, etype_in_filetype
, st_n_filetypes
;
158 ADIO_Offset abs_off_in_filetype
=0;
159 int filetype_size
, etype_size
, buftype_size
;
160 MPI_Aint filetype_extent
, buftype_extent
;
161 int buf_count
, buftype_is_contig
, filetype_is_contig
;
162 ADIO_Offset userbuf_off
;
163 ADIO_Offset off
, req_off
, disp
, end_offset
=0, writebuf_off
, start_off
;
165 unsigned bufsize
, writebuf_len
, write_sz
;
167 ADIO_Offset new_bwr_size
, new_fwr_size
, st_fwr_size
, fwr_size
=0, bwr_size
, req_len
;
169 static char myname
[] = "ADIOI_LUSTRE_WriteStrided";
171 if (fd
->hints
->ds_write
== ADIOI_HINT_DISABLE
) {
172 /* if user has disabled data sieving on writes, use naive
175 ADIOI_GEN_WriteStrided_naive(fd
,
180 offset
, status
, error_code
);
184 *error_code
= MPI_SUCCESS
; /* changed below if error */
186 ADIOI_Datatype_iscontig(datatype
, &buftype_is_contig
);
187 ADIOI_Datatype_iscontig(fd
->filetype
, &filetype_is_contig
);
189 MPI_Type_size(fd
->filetype
, &filetype_size
);
190 if (!filetype_size
) {
191 *error_code
= MPI_SUCCESS
;
195 MPI_Type_extent(fd
->filetype
, &filetype_extent
);
196 MPI_Type_size(datatype
, &buftype_size
);
197 MPI_Type_extent(datatype
, &buftype_extent
);
198 etype_size
= fd
->etype_size
;
200 ADIOI_Assert((buftype_size
* count
) == ((ADIO_Offset
)(unsigned)buftype_size
* (ADIO_Offset
)count
));
201 bufsize
= buftype_size
* count
;
203 /* get striping info */
204 stripe_size
= fd
->hints
->striping_unit
;
206 /* Different buftype to different filetype */
207 if (!buftype_is_contig
&& filetype_is_contig
) {
208 /* noncontiguous in memory, contiguous in file. */
209 ADIOI_Flatten_datatype(datatype
);
210 flat_buf
= ADIOI_Flatlist
;
211 while (flat_buf
->type
!= datatype
)
212 flat_buf
= flat_buf
->next
;
214 off
= (file_ptr_type
== ADIO_INDIVIDUAL
) ? fd
->fp_ind
:
215 fd
->disp
+ (ADIO_Offset
)etype_size
* offset
;
218 end_offset
= start_off
+ bufsize
- 1;
219 /* write stripe size buffer each time */
220 writebuf
= (char *) ADIOI_Malloc(ADIOI_MIN(bufsize
, stripe_size
));
224 /* if atomicity is true, lock the region to be accessed */
226 ADIOI_WRITE_LOCK(fd
, start_off
, SEEK_SET
, bufsize
);
228 for (j
= 0; j
< count
; j
++) {
229 for (i
= 0; i
< flat_buf
->count
; i
++) {
230 userbuf_off
= (ADIO_Offset
)j
* (ADIO_Offset
)buftype_extent
+
231 flat_buf
->indices
[i
];
233 req_len
= flat_buf
->blocklens
[i
];
234 ADIOI_BUFFERED_WRITE_WITHOUT_READ
235 off
+= flat_buf
->blocklens
[i
];
239 /* write the buffer out finally */
240 ADIO_WriteContig(fd
, writebuf
, writebuf_len
, MPI_BYTE
,
241 ADIO_EXPLICIT_OFFSET
, writebuf_off
, &status1
,
245 ADIOI_UNLOCK(fd
, start_off
, SEEK_SET
, bufsize
);
246 if (*error_code
!= MPI_SUCCESS
) {
247 ADIOI_Free(writebuf
);
250 ADIOI_Free(writebuf
);
251 if (file_ptr_type
== ADIO_INDIVIDUAL
)
254 /* noncontiguous in file */
255 /* filetype already flattened in ADIO_Open */
256 flat_file
= ADIOI_Flatlist
;
257 while (flat_file
->type
!= fd
->filetype
)
258 flat_file
= flat_file
->next
;
261 if (file_ptr_type
== ADIO_INDIVIDUAL
) {
262 /* Wei-keng reworked type processing to be a bit more efficient */
263 offset
= fd
->fp_ind
- disp
;
264 n_filetypes
= (offset
- flat_file
->indices
[0]) / filetype_extent
;
265 offset
-= (ADIO_Offset
)n_filetypes
* filetype_extent
;
266 /* now offset is local to this extent */
268 /* find the block where offset is located, skip blocklens[i]==0 */
269 for (i
=0; i
<flat_file
->count
; i
++) {
271 if (flat_file
->blocklens
[i
] == 0) continue;
272 dist
= flat_file
->indices
[i
] + flat_file
->blocklens
[i
] - offset
;
273 /* fwr_size is from offset to the end of block i */
276 offset
= flat_file
->indices
[i
];
277 fwr_size
= flat_file
->blocklens
[i
];
285 st_index
= i
; /* starting index in flat_file->indices[] */
286 offset
+= disp
+ (ADIO_Offset
)n_filetypes
*filetype_extent
;
289 n_etypes_in_filetype
= filetype_size
/etype_size
;
290 n_filetypes
= offset
/ n_etypes_in_filetype
;
291 etype_in_filetype
= offset
% n_etypes_in_filetype
;
292 size_in_filetype
= etype_in_filetype
* etype_size
;
295 for (i
= 0; i
< flat_file
->count
; i
++) {
296 sum
+= flat_file
->blocklens
[i
];
297 if (sum
> size_in_filetype
) {
299 fwr_size
= sum
- size_in_filetype
;
300 abs_off_in_filetype
= flat_file
->indices
[i
] +
301 size_in_filetype
- (sum
- flat_file
->blocklens
[i
]);
306 /* abs. offset in bytes in the file */
307 offset
= disp
+ (ADIO_Offset
) n_filetypes
*filetype_extent
+
313 /* Wei-keng Liao:write request is within single flat_file
315 /* this could happen, for example, with subarray types that are
316 * actually fairly contiguous */
317 if (buftype_is_contig
&& bufsize
<= fwr_size
) {
320 end_offset
= start_off
+ bufsize
- 1;
321 writebuf
= (char *) ADIOI_Malloc(ADIOI_MIN(bufsize
, stripe_size
));
322 memset(writebuf
, -1, ADIOI_MIN(bufsize
, stripe_size
));
326 ADIOI_BUFFERED_WRITE_WITHOUT_READ
327 /* write the buffer out finally */
328 ADIO_WriteContig(fd
, writebuf
, writebuf_len
, MPI_BYTE
,
329 ADIO_EXPLICIT_OFFSET
, writebuf_off
, &status1
,
332 if (file_ptr_type
== ADIO_INDIVIDUAL
) {
333 /* update MPI-IO file pointer to point to the first byte
334 * that can be accessed in the fileview. */
335 fd
->fp_ind
= offset
+ bufsize
;
336 if (bufsize
== fwr_size
) {
339 if (st_index
== flat_file
->count
) {
343 } while (flat_file
->blocklens
[st_index
] == 0);
344 fd
->fp_ind
= disp
+ flat_file
->indices
[st_index
]
345 + (ADIO_Offset
)n_filetypes
*filetype_extent
;
348 fd
->fp_sys_posn
= -1; /* set it to null. */
349 #ifdef HAVE_STATUS_SET_BYTES
350 MPIR_Status_set_bytes(status
, datatype
, bufsize
);
352 ADIOI_Free(writebuf
);
356 /* Calculate end_offset, the last byte-offset that will be accessed.
357 e.g., if start_offset=0 and 100 bytes to be write, end_offset=99*/
359 st_fwr_size
= fwr_size
;
360 st_n_filetypes
= n_filetypes
;
364 fwr_size
= ADIOI_MIN(st_fwr_size
, bufsize
);
365 while (i_offset
< bufsize
) {
366 i_offset
+= fwr_size
;
367 end_offset
= off
+ fwr_size
- 1;
369 j
= (j
+1) % flat_file
->count
;
370 n_filetypes
+= (j
== 0) ? 1 : 0;
371 while (flat_file
->blocklens
[j
]==0) {
372 j
= (j
+1) % flat_file
->count
;
373 n_filetypes
+= (j
== 0) ? 1 : 0;
376 off
= disp
+ flat_file
->indices
[j
] +
377 n_filetypes
*(ADIO_Offset
)filetype_extent
;
378 fwr_size
= ADIOI_MIN(flat_file
->blocklens
[j
], bufsize
-i_offset
);
381 /* if atomicity is true, lock the region to be accessed */
383 ADIOI_WRITE_LOCK(fd
, start_off
, SEEK_SET
, end_offset
-start_off
+1);
387 writebuf
= (char *) ADIOI_Malloc(stripe_size
);
388 memset(writebuf
, -1, stripe_size
);
390 if (buftype_is_contig
&& !filetype_is_contig
) {
392 /* contiguous in memory, noncontiguous in file. should be the most
398 n_filetypes
= st_n_filetypes
;
399 fwr_size
= ADIOI_MIN(st_fwr_size
, bufsize
);
400 while (i_offset
< bufsize
) {
402 /* TYPE_UB and TYPE_LB can result in
403 fwr_size = 0. save system call in such cases */
404 /* lseek(fd->fd_sys, off, SEEK_SET);
405 err = write(fd->fd_sys, ((char *) buf) + i_offset, fwr_size);*/
409 userbuf_off
= i_offset
;
412 i_offset
+= fwr_size
;
414 if (off
+ fwr_size
< disp
+ flat_file
->indices
[j
] +
415 flat_file
->blocklens
[j
] +
416 n_filetypes
*(ADIO_Offset
)filetype_extent
)
418 /* did not reach end of contiguous block in filetype.
419 no more I/O needed. off is incremented by fwr_size. */
421 j
= (j
+1) % flat_file
->count
;
422 n_filetypes
+= (j
== 0) ? 1 : 0;
423 while (flat_file
->blocklens
[j
]==0) {
424 j
= (j
+1) % flat_file
->count
;
425 n_filetypes
+= (j
== 0) ? 1 : 0;
427 off
= disp
+ flat_file
->indices
[j
] +
428 n_filetypes
*(ADIO_Offset
)filetype_extent
;
429 fwr_size
= ADIOI_MIN(flat_file
->blocklens
[j
],
435 /* noncontiguous in memory as well as in file */
437 ADIOI_Flatten_datatype(datatype
);
438 flat_buf
= ADIOI_Flatlist
;
439 while (flat_buf
->type
!= datatype
) flat_buf
= flat_buf
->next
;
441 k
= num
= buf_count
= 0;
442 i_offset
= flat_buf
->indices
[0];
445 n_filetypes
= st_n_filetypes
;
446 fwr_size
= st_fwr_size
;
447 bwr_size
= flat_buf
->blocklens
[0];
449 while (num
< bufsize
) {
450 size
= ADIOI_MIN(fwr_size
, bwr_size
);
452 /* lseek(fd->fd_sys, off, SEEK_SET);
453 err = write(fd->fd_sys, ((char *) buf) + i_offset, size); */
457 userbuf_off
= i_offset
;
461 new_fwr_size
= fwr_size
;
462 new_bwr_size
= bwr_size
;
464 if (size
== fwr_size
) {
465 /* reached end of contiguous block in file */
466 j
= (j
+1) % flat_file
->count
;
467 n_filetypes
+= (j
== 0) ? 1 : 0;
468 while (flat_file
->blocklens
[j
]==0) {
469 j
= (j
+1) % flat_file
->count
;
470 n_filetypes
+= (j
== 0) ? 1 : 0;
473 off
= disp
+ flat_file
->indices
[j
] +
474 n_filetypes
*(ADIO_Offset
)filetype_extent
;
476 new_fwr_size
= flat_file
->blocklens
[j
];
477 if (size
!= bwr_size
) {
479 new_bwr_size
-= size
;
483 if (size
== bwr_size
) {
484 /* reached end of contiguous block in memory */
486 k
= (k
+ 1)%flat_buf
->count
;
488 i_offset
= (ADIO_Offset
)buftype_extent
*
489 (ADIO_Offset
)(buf_count
/flat_buf
->count
) +
490 flat_buf
->indices
[k
];
491 new_bwr_size
= flat_buf
->blocklens
[k
];
492 if (size
!= fwr_size
) {
494 new_fwr_size
-= size
;
498 fwr_size
= new_fwr_size
;
499 bwr_size
= new_bwr_size
;
503 /* write the buffer out finally */
505 ADIO_WriteContig(fd
, writebuf
, writebuf_len
, MPI_BYTE
,
506 ADIO_EXPLICIT_OFFSET
,
507 writebuf_off
, &status1
, error_code
);
508 if (!(fd
->atomicity
))
509 ADIOI_UNLOCK(fd
, writebuf_off
, SEEK_SET
, writebuf_len
);
510 if (*error_code
!= MPI_SUCCESS
) return;
513 ADIOI_UNLOCK(fd
, start_off
, SEEK_SET
, end_offset
-start_off
+1);
515 ADIOI_Free(writebuf
);
517 if (file_ptr_type
== ADIO_INDIVIDUAL
) fd
->fp_ind
= off
;
520 fd
->fp_sys_posn
= -1; /* set it to null. */
522 #ifdef HAVE_STATUS_SET_BYTES
523 MPIR_Status_set_bytes(status
, datatype
, bufsize
);
524 /* This is a temporary way of filling in status. The right way is to
525 keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
528 if (!buftype_is_contig
)
529 ADIOI_Delete_flattened(datatype
);