1 /* ---------------------------------------------------------------- */
2 /* (C)Copyright IBM Corp. 2007, 2008 */
3 /* ---------------------------------------------------------------- */
9 /* -*- Mode: C; c-basic-offset:4 ; -*- */
11 * Copyright (C) 1997 University of Chicago.
12 * See COPYRIGHT notice in top-level directory.
16 #include "ad_bgl_aggrs.h"
18 #include <sys/statfs.h>
21 /* COPIED FROM ad_fstype.c since it is static in that file
23 ADIO_FileSysType_parentdir - determines a string pathname for the
24 parent directory of a given filename.
27 . filename - pointer to file name character array
30 . dirnamep - pointer to location in which to store a pointer to a string
32 Note that the caller should free the memory located at the pointer returned
33 after the string is no longer needed.
37 #define PATH_MAX 65535
40 /* In a strict ANSI environment, S_ISLNK may not be defined. Fix that
41 here. We assume that S_ISLNK is *always* defined as a macro. If
42 that is not universally true, then add a test to the romio
43 configure that trys to link a program that references S_ISLNK */
46 /* Check for the link bit */
47 # define S_ISLNK(mode) ((mode) & S_IFLNK)
49 /* no way to check if it is a link, so say false */
50 # define S_ISLNK(mode) 0
52 #endif /* !(S_ISLNK) */
54 /* ADIO_FileSysType_parentdir
56 * Returns pointer to string in dirnamep; that string is allocated with
57 * strdup and must be free()'d.
59 static void ADIO_FileSysType_parentdir(char *filename
, char **dirnamep
)
62 char *dir
= NULL
, *slash
;
65 err
= lstat(filename
, &statbuf
);
67 if (err
|| (!S_ISLNK(statbuf
.st_mode
))) {
68 /* no such file, or file is not a link; these are the "normal"
69 * cases where we can just return the parent directory.
71 dir
= ADIOI_Strdup(filename
);
74 /* filename is a symlink. we've presumably already tried
75 * to stat it and found it to be missing (dangling link),
76 * but this code doesn't care if the target is really there
82 linkbuf
= ADIOI_Malloc(PATH_MAX
+1);
83 namelen
= readlink(filename
, linkbuf
, PATH_MAX
+1);
85 /* something strange has happened between the time that
86 * we determined that this was a link and the time that
87 * we attempted to read it; punt and use the old name.
89 dir
= ADIOI_Strdup(filename
);
92 /* successfully read the link */
93 linkbuf
[namelen
] = '\0'; /* readlink doesn't null terminate */
94 dir
= ADIOI_Strdup(linkbuf
);
99 slash
= strrchr(dir
, '/');
100 if (!slash
) ADIOI_Strncpy(dir
, ".", 2);
102 if (slash
== dir
) *(dir
+ 1) = '\0';
110 static void scaleable_stat(ADIO_File fd
)
112 struct stat64 bgl_stat
;
113 struct statfs bgl_statfs
;
117 MPI_Comm_rank(fd
->comm
, &rank
);
120 /* Get the (real) underlying file system block size */
121 rc
= stat64(fd
->filename
, &bgl_stat
);
124 buf
[0] = bgl_stat
.st_blksize
;
125 DBGV_FPRINTF(stderr
,"Successful stat '%s'. Blocksize=%ld\n",
126 fd
->filename
,bgl_stat
.st_blksize
);
130 DBGV_FPRINTF(stderr
,"Stat '%s' failed with rc=%d, errno=%d\n",
131 fd
->filename
,rc
,errno
);
133 /* Get the (real) underlying file system type so we can
134 * plan our fsync scaling strategy */
135 rc
= statfs(fd
->filename
,&bgl_statfs
);
138 DBGV_FPRINTF(stderr
,"Successful statfs '%s'. Magic number=%#X\n",
139 fd
->filename
,bgl_statfs
.f_type
);
140 buf
[1] = bgl_statfs
.f_type
;
144 DBGV_FPRINTF(stderr
,"Statfs '%s' failed with rc=%d, errno=%d\n",
145 fd
->filename
,rc
,errno
);
146 ADIO_FileSysType_parentdir(fd
->filename
, &dir
);
147 rc
= statfs(dir
,&bgl_statfs
);
150 DBGV_FPRINTF(stderr
,"Successful statfs '%s'. Magic number=%#X\n",dir
,bgl_statfs
.f_type
);
151 buf
[1] = bgl_statfs
.f_type
;
155 /* Hmm. Guess we'll assume the worst-case, that it's not GPFS
156 * or BGLOCKLESSMPIO_F_TYPE (default PVFS2) below */
157 buf
[1] = -1; /* bogus magic number */
158 DBGV_FPRINTF(stderr
,"Statfs '%s' failed with rc=%d, errno=%d\n",dir
,rc
,errno
);
163 /* now we can broadcast the stat/statfs data to everyone else */
164 MPI_Bcast(buf
, 2, MPI_LONG
, 0, fd
->comm
);
165 bgl_stat
.st_blksize
= buf
[0];
166 bgl_statfs
.f_type
= buf
[1];
168 /* data from stat64 */
169 /* store the blksize in the file system specific storage */
170 ((ADIOI_BGL_fs
*)fd
->fs_ptr
)->blksize
= bgl_stat
.st_blksize
;
172 /* data from statfs */
173 if ((bgl_statfs
.f_type
== GPFS_SUPER_MAGIC
) ||
174 (bgl_statfs
.f_type
== bglocklessmpio_f_type
))
176 ((ADIOI_BGL_fs
*)fd
->fs_ptr
)->fsync_aggr
=
177 ADIOI_BGL_FSYNC_AGGREGATION_ENABLED
;
179 /* Only one rank is an "fsync aggregator" because only one
183 ((ADIOI_BGL_fs
*)fd
->fs_ptr
)->fsync_aggr
|=
184 ADIOI_BGL_FSYNC_AGGREGATOR
;
185 DBG_FPRINTF(stderr
,"fsync aggregator %d\n",rank
);
187 else ; /* aggregation enabled but this rank is not an aggregator*/
189 else; /* Other filesystems default to no fsync aggregation */
193 void ADIOI_BGL_Open(ADIO_File fd
, int *error_code
)
195 int perm
, old_mask
, amode
;
196 static char myname
[] = "ADIOI_BGL_OPEN";
198 /* set internal variables for tuning environment variables */
199 ad_bgl_get_env_vars();
201 if (fd
->perm
== ADIO_PERM_NULL
) {
202 old_mask
= umask(022);
204 perm
= old_mask
^ 0666;
206 else perm
= fd
->perm
;
209 if (fd
->access_mode
& ADIO_CREATE
)
210 amode
= amode
| O_CREAT
;
211 if (fd
->access_mode
& ADIO_RDONLY
)
212 amode
= amode
| O_RDONLY
;
213 if (fd
->access_mode
& ADIO_WRONLY
)
214 amode
= amode
| O_WRONLY
;
215 if (fd
->access_mode
& ADIO_RDWR
)
216 amode
= amode
| O_RDWR
;
217 if (fd
->access_mode
& ADIO_EXCL
)
218 amode
= amode
| O_EXCL
;
219 #ifdef ADIOI_MPE_LOGGING
220 MPE_Log_event(ADIOI_MPE_open_a
, 0, NULL
);
222 fd
->fd_sys
= open(fd
->filename
, amode
, perm
);
223 #ifdef ADIOI_MPE_LOGGING
224 MPE_Log_event(ADIOI_MPE_open_b
, 0, NULL
);
226 DBG_FPRINTF(stderr
,"open('%s',%#X,%#X) rc=%d, errno=%d\n",fd
->filename
,amode
,perm
,fd
->fd_sys
,errno
);
229 if ((fd
->fd_sys
!= -1) && (fd
->access_mode
& ADIO_APPEND
))
230 fd
->fp_ind
= fd
->fp_sys_posn
= lseek(fd
->fd_sys
, 0, SEEK_END
);
234 /* Initialize the ad_bgl file system specific information */
235 AD_BGL_assert(fd
->fs_ptr
== NULL
);
236 fd
->fs_ptr
= (ADIOI_BGL_fs
*) ADIOI_Malloc(sizeof(ADIOI_BGL_fs
));
238 ((ADIOI_BGL_fs
*)fd
->fs_ptr
)->blksize
= 1048576; /* default to 1M */
240 /* default is no fsync aggregation */
241 ((ADIOI_BGL_fs
*)fd
->fs_ptr
)->fsync_aggr
=
242 ADIOI_BGL_FSYNC_AGGREGATION_DISABLED
;
245 #ifdef ADIOI_MPE_LOGGING
246 MPE_Log_event(ADIOI_MPE_stat_a
, 0, NULL
);
249 #ifdef ADIOI_MPE_LOGGING
250 MPE_Log_event(ADIOI_MPE_stat_b
, 0, NULL
);
254 if (fd
->fd_sys
== -1) {
255 if (errno
== ENAMETOOLONG
)
256 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
257 MPIR_ERR_RECOVERABLE
, myname
,
258 __LINE__
, MPI_ERR_BAD_FILE
,
260 "**filenamelong %s %d",
262 strlen(fd
->filename
));
263 else if (errno
== ENOENT
)
264 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
265 MPIR_ERR_RECOVERABLE
, myname
,
266 __LINE__
, MPI_ERR_NO_SUCH_FILE
,
270 else if (errno
== ENOTDIR
|| errno
== ELOOP
)
271 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
272 MPIR_ERR_RECOVERABLE
,
278 else if (errno
== EACCES
) {
279 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
280 MPIR_ERR_RECOVERABLE
, myname
,
281 __LINE__
, MPI_ERR_ACCESS
,
286 else if (errno
== EROFS
) {
287 /* Read only file or file system and write access requested */
288 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
289 MPIR_ERR_RECOVERABLE
, myname
,
290 __LINE__
, MPI_ERR_READ_ONLY
,
294 *error_code
= MPIO_Err_create_code(MPI_SUCCESS
,
295 MPIR_ERR_RECOVERABLE
, myname
,
296 __LINE__
, MPI_ERR_IO
, "**io",
297 "**io %s", strerror(errno
));
300 else *error_code
= MPI_SUCCESS
;
303 *vim: ts=8 sts=4 sw=4 noexpandtab