rt/aio_misc.c

   1 /* Handle general operations.
   2    Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Library General Public License as
   8    published by the Free Software Foundation; either version 2 of the
   9    License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Library General Public License for more details.
  15
  16    You should have received a copy of the GNU Library General Public
  17    License along with the GNU C Library; see the file COPYING.LIB.  If not,
  18    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19    Boston, MA 02111-1307, USA.  */
  20
  21 #include <aio.h>
  22 #include <errno.h>
  23 #include <limits.h>
  24 #include <pthread.h>
  25 #include <stdlib.h>
  26 #include <unistd.h>
  27 #include <sys/stat.h>
  28
  29 #include "aio_misc.h"
  30
  31 /* Pool of request list entries.  */
  32 static struct requestlist **pool;
  33
  34 /* Number of total and allocated pool entries.  */
  35 static size_t pool_tab_size;
  36 static size_t pool_size;
  37
  38 /* We implement a two dimensional array but allocate each row separately.
  39    The macro below determines how many entries should be used per row.
  40    It should better be a power of two.  */
  41 #define ENTRIES_PER_ROW 16
  42
  43 /* The row table is incremented in units of this.  */
  44 #define ROW_STEP        8
  45
  46 /* List of available entries.  */
  47 static struct requestlist *freelist;
  48
  49 /* List of request waiting to be processed.  */
  50 static struct requestlist *runlist;
  51
  52 /* Structure list of all currently processed requests.  */
  53 static struct requestlist *requests;
  54
  55 /* Number of threads currently running.  */
  56 static int nthreads;
  57
  58
  59 /* These are the values used to optimize the use of AIO.  The user can
  60    overwrite them by using the `aio_init' function.  */
  61 static struct aioinit optim =
  62 {
  63   20,   /* int aio_threads;     Maximal number of threads.  */
  64   256,  /* int aio_num;         Number of expected simultanious requests. */
  65   0,
  66   0,
  67   0,
  68   0,
  69   { 0, }
  70 };
  71
  72
  73 /* Since the list is global we need a mutex protecting it.  */
  74 pthread_mutex_t __aio_requests_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
  75
  76
  77 /* Functions to handle request list pool.  */
  78 static struct requestlist *
  79 get_elem (void)
  80 {
  81   struct requestlist *result;
  82
  83   if (freelist == NULL)
  84     {
  85       struct requestlist *new_row;
  86       size_t new_size;
  87
  88       /* Compute new size.  */
  89       new_size = pool_size ? pool_size + ENTRIES_PER_ROW : optim.aio_num;
  90
  91       if ((new_size / ENTRIES_PER_ROW) >= pool_tab_size)
  92         {
  93           size_t new_tab_size = new_size / ENTRIES_PER_ROW;
  94           struct requestlist **new_tab;
  95
  96           new_tab = (struct requestlist **)
  97             realloc (pool, (new_tab_size * sizeof (struct requestlist *)));
  98
  99           if (new_tab == NULL)
 100             return NULL;
 101
 102           pool_tab_size = new_tab_size;
 103           pool = new_tab;
 104         }
 105
 106       if (pool_size == 0)
 107         {
 108           size_t cnt;
 109
 110           new_row = (struct requestlist *)
 111             calloc (new_size, sizeof (struct requestlist));
 112
 113           if (new_row == NULL)
 114             return NULL;
 115
 116           for (cnt = 0; cnt < new_size / ENTRIES_PER_ROW; ++cnt)
 117             pool[cnt] = &new_row[cnt * ENTRIES_PER_ROW];
 118         }
 119       else
 120         {
 121           /* Allocat one new row.  */
 122           new_row = (struct requestlist *)
 123             calloc (ENTRIES_PER_ROW, sizeof (struct requestlist));
 124           if (new_row == NULL)
 125             return NULL;
 126
 127           pool[new_size / ENTRIES_PER_ROW] = new_row;
 128         }
 129
 130       /* Put all the new entries in the freelist.  */
 131       do
 132         {
 133           new_row->next_prio = freelist;
 134           freelist = new_row++;
 135         }
 136       while (++pool_size < new_size);
 137     }
 138
 139   result = freelist;
 140   freelist = freelist->next_prio;
 141
 142   return result;
 143 }
 144
 145
 146 void
 147 __aio_free_request (struct requestlist *elem)
 148 {
 149   elem->running = no;
 150   elem->next_prio = freelist;
 151   freelist = elem;
 152 }
 153
 154
 155 struct requestlist *
 156 __aio_find_req (aiocb_union *elem)
 157 {
 158   struct requestlist *runp = requests;
 159   int fildes = elem->aiocb.aio_fildes;
 160
 161   while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
 162     runp = runp->next_fd;
 163
 164   if (runp != NULL)
 165     {
 166       if (runp->aiocbp->aiocb.aio_fildes != fildes)
 167         runp = NULL;
 168       else
 169         while (runp != NULL && runp->aiocbp != elem)
 170           runp = runp->next_prio;
 171     }
 172
 173   return runp;
 174 }
 175
 176
 177 struct requestlist *
 178 __aio_find_req_fd (int fildes)
 179 {
 180   struct requestlist *runp = requests;
 181
 182   while (runp != NULL && runp->aiocbp->aiocb.aio_fildes < fildes)
 183     runp = runp->next_fd;
 184
 185   return (runp != NULL && runp->aiocbp->aiocb.aio_fildes == fildes
 186           ? runp : NULL);
 187 }
 188
 189
 190 /* The thread handler.  */
 191 static void *handle_fildes_io (void *arg);
 192
 193
 194 /* User optimization.  */
 195 void
 196 __aio_init (const struct aioinit *init)
 197 {
 198   /* Get the mutex.  */
 199   pthread_mutex_lock (&__aio_requests_mutex);
 200
 201   /* Only allow writing new values if the table is not yet allocated.  */
 202   if (pool == NULL)
 203     {
 204       optim.aio_threads = init->aio_threads < 1 ? 1 : init->aio_threads;
 205       optim.aio_num = (init->aio_num < ENTRIES_PER_ROW
 206                        ? ENTRIES_PER_ROW
 207                        : init->aio_num & ~ENTRIES_PER_ROW);
 208     }
 209
 210   /* Release the mutex.  */
 211   pthread_mutex_unlock (&__aio_requests_mutex);
 212 }
 213 weak_alias (__aio_init, aio_init)
 214
 215
 216 /* The main function of the async I/O handling.  It enqueues requests
 217    and if necessary starts and handles threads.  */
 218 struct requestlist *
 219 __aio_enqueue_request (aiocb_union *aiocbp, int operation)
 220 {
 221   int result = 0;
 222   int policy, prio;
 223   struct sched_param param;
 224   struct requestlist *last, *runp, *newp;
 225   int running = no;
 226
 227   if (aiocbp->aiocb.aio_reqprio < 0
 228       || aiocbp->aiocb.aio_reqprio > AIO_PRIO_DELTA_MAX)
 229     {
 230       /* Invalid priority value.  */
 231       __set_errno (EINVAL);
 232       aiocbp->aiocb.__error_code = EINVAL;
 233       aiocbp->aiocb.__return_value = -1;
 234       return NULL;
 235     }
 236
 237   /* Compute priority for this request.  */
 238   pthread_getschedparam (pthread_self (), &policy, &param);
 239   prio = param.sched_priority - aiocbp->aiocb.aio_reqprio;
 240
 241   /* Get the mutex.  */
 242   pthread_mutex_lock (&__aio_requests_mutex);
 243
 244   last = NULL;
 245   runp = requests;
 246   /* First look whether the current file descriptor is currently
 247      worked with.  */
 248   while (runp != NULL
 249          && runp->aiocbp->aiocb.aio_fildes < aiocbp->aiocb.aio_fildes)
 250     {
 251       last = runp;
 252       runp = runp->next_fd;
 253     }
 254
 255   /* Get a new element for the waiting list.  */
 256   newp = get_elem ();
 257   if (newp == NULL)
 258     {
 259       __set_errno (EAGAIN);
 260       pthread_mutex_unlock (&__aio_requests_mutex);
 261       return NULL;
 262     }
 263   newp->aiocbp = aiocbp;
 264   newp->waiting = NULL;
 265
 266   aiocbp->aiocb.__abs_prio = prio;
 267   aiocbp->aiocb.__policy = policy;
 268   aiocbp->aiocb.aio_lio_opcode = operation;
 269   aiocbp->aiocb.__error_code = EINPROGRESS;
 270   aiocbp->aiocb.__return_value = 0;
 271
 272   if (runp != NULL
 273       && runp->aiocbp->aiocb.aio_fildes == aiocbp->aiocb.aio_fildes)
 274     {
 275       /* The current file descriptor is worked on.  It makes no sense
 276          to start another thread since this new thread would fight
 277          with the running thread for the resources.  But we also cannot
 278          say that the thread processing this desriptor shall immediately
 279          after finishing the current job process this request if there
 280          are other threads in the running queue which have a higher
 281          priority.  */
 282
 283       /* Simply enqueue it after the running one according to the
 284          priority.  */
 285       while (runp->next_prio != NULL
 286              && runp->next_prio->aiocbp->aiocb.__abs_prio >= prio)
 287         runp = runp->next_prio;
 288
 289       newp->next_prio = runp->next_prio;
 290       runp->next_prio = newp;
 291
 292       running = queued;
 293     }
 294   else
 295     {
 296       /* Enqueue this request for a new descriptor.  */
 297       if (last == NULL)
 298         {
 299           newp->last_fd = NULL;
 300           newp->next_fd = requests;
 301           if (requests != NULL)
 302             requests->last_fd = newp;
 303           requests = newp;
 304         }
 305       else
 306         {
 307           newp->next_fd = last->next_fd;
 308           newp->last_fd = last;
 309           last->next_fd = newp;
 310           if (newp->next_fd != NULL)
 311             newp->next_fd->last_fd = newp;
 312         }
 313
 314       newp->next_prio = NULL;
 315     }
 316
 317   if (running == no)
 318     {
 319       /* We try to create a new thread for this file descriptor.  The
 320          function which gets called will handle all available requests
 321          for this descriptor and when all are processed it will
 322          terminate.
 323
 324          If no new thread can be created or if the specified limit of
 325          threads for AIO is reached we queue the request.  */
 326
 327       /* See if we can create a thread.  */
 328       if (nthreads < optim.aio_threads)
 329         {
 330           pthread_t thid;
 331           pthread_attr_t attr;
 332
 333           /* Make sure the thread is created detached.  */
 334           pthread_attr_init (&attr);
 335           pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
 336
 337           /* Now try to start a thread.  */
 338           if (pthread_create (&thid, &attr, handle_fildes_io, newp) == 0)
 339             {
 340               /* We managed to enqueue the request.  All errors which can
 341                  happen now can be recognized by calls to `aio_return' and
 342                  `aio_error'.  */
 343               running = allocated;
 344               ++nthreads;
 345             }
 346           else if (nthreads == 0)
 347             /* We cannot create a thread in the moment and there is
 348                also no thread running.  This is a problem.  `errno' is
 349                set to EAGAIN if this is only a temporary problem.  */
 350             result = -1;
 351         }
 352     }
 353
 354   /* Enqueue the request in the run queue if it is not yet running.  */
 355   if (running < yes && result == 0)
 356     {
 357       if (runlist == NULL || runlist->aiocbp->aiocb.__abs_prio < prio)
 358         {
 359           newp->next_run = runlist;
 360           runlist = newp;
 361         }
 362       else
 363         {
 364           runp = runlist;
 365
 366           while (runp->next_run != NULL
 367                  && runp->next_run->aiocbp->aiocb.__abs_prio >= prio)
 368             runp = runp->next_run;
 369
 370           newp->next_run = runp->next_run;
 371           runp->next_run = newp;
 372         }
 373     }
 374
 375   if (result == 0)
 376     newp->running = running;
 377   else
 378     {
 379       /* Something went wrong.  */
 380       __aio_free_request (newp);
 381       newp = NULL;
 382     }
 383
 384   /* Release the mutex.  */
 385   pthread_mutex_unlock (&__aio_requests_mutex);
 386
 387   return newp;
 388 }
 389
 390
 391 static void *
 392 handle_fildes_io (void *arg)
 393 {
 394   pthread_t self = pthread_self ();
 395   struct sched_param param;
 396   struct requestlist *runp = (struct requestlist *) arg;
 397   aiocb_union *aiocbp;
 398   int policy;
 399   int fildes;
 400
 401   pthread_getschedparam (self, &policy, &param);
 402
 403   do
 404     {
 405       /* Update our variables.  */
 406       aiocbp = runp->aiocbp;
 407       fildes = aiocbp->aiocb.aio_fildes;
 408
 409       /* Change the priority to the requested value (if necessary).  */
 410       if (aiocbp->aiocb.__abs_prio != param.sched_priority
 411           || aiocbp->aiocb.__policy != policy)
 412         {
 413           param.sched_priority = aiocbp->aiocb.__abs_prio;
 414           policy = aiocbp->aiocb.__policy;
 415           pthread_setschedparam (self, policy, &param);
 416         }
 417
 418       /* Process request pointed to by RUNP.  We must not be disturbed
 419          by signals.  */
 420       if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_READ)
 421         {
 422           if (aiocbp->aiocb.aio_lio_opcode & 128)
 423             aiocbp->aiocb.__return_value =
 424               TEMP_FAILURE_RETRY (__pread64 (fildes,
 425                                              (void *) aiocbp->aiocb64.aio_buf,
 426                                              aiocbp->aiocb64.aio_nbytes,
 427                                              aiocbp->aiocb64.aio_offset));
 428           else
 429             aiocbp->aiocb.__return_value =
 430               TEMP_FAILURE_RETRY (pread (fildes,
 431                                          (void *) aiocbp->aiocb.aio_buf,
 432                                          aiocbp->aiocb.aio_nbytes,
 433                                          aiocbp->aiocb.aio_offset));
 434         }
 435       else if ((aiocbp->aiocb.aio_lio_opcode & 127) == LIO_WRITE)
 436         {
 437           if (aiocbp->aiocb.aio_lio_opcode & 128)
 438             aiocbp->aiocb.__return_value =
 439               TEMP_FAILURE_RETRY (__pwrite64 (fildes,
 440                                               (const void *) aiocbp->aiocb64.aio_buf,
 441                                               aiocbp->aiocb64.aio_nbytes,
 442                                               aiocbp->aiocb64.aio_offset));
 443           else
 444             aiocbp->aiocb.__return_value =
 445               TEMP_FAILURE_RETRY (pwrite (fildes,
 446                                           (const void *) aiocbp->aiocb.aio_buf,
 447                                           aiocbp->aiocb.aio_nbytes,
 448                                           aiocbp->aiocb.aio_offset));
 449         }
 450       else if (aiocbp->aiocb.aio_lio_opcode == LIO_DSYNC)
 451         aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fdatasync (fildes));
 452       else if (aiocbp->aiocb.aio_lio_opcode == LIO_SYNC)
 453         aiocbp->aiocb.__return_value = TEMP_FAILURE_RETRY (fsync (fildes));
 454       else
 455         {
 456           /* This is an invalid opcode.  */
 457           aiocbp->aiocb.__return_value = -1;
 458           __set_errno (EINVAL);
 459         }
 460
 461       /* Get the mutex.  */
 462       pthread_mutex_lock (&__aio_requests_mutex);
 463
 464       if (aiocbp->aiocb.__return_value == -1)
 465         aiocbp->aiocb.__error_code = errno;
 466       else
 467         aiocbp->aiocb.__error_code = 0;
 468
 469       /* Send the signal to notify about finished processing of the
 470          request.  */
 471       __aio_notify (runp);
 472
 473       /* Now dequeue the current request.  */
 474       if (runp->next_prio == NULL)
 475         {
 476           /* No outstanding request for this descriptor.  Remove this
 477              descriptor from the list.  */
 478           if (runp->next_fd != NULL)
 479             runp->next_fd->last_fd = runp->last_fd;
 480           if (runp->last_fd != NULL)
 481             runp->last_fd->next_fd = runp->next_fd;
 482           else
 483             requests = runp->next_fd;
 484         }
 485       else
 486         {
 487           runp->next_prio->last_fd = runp->last_fd;
 488           runp->next_prio->next_fd = runp->next_fd;
 489           runp->next_prio->running = yes;
 490           if (runp->next_fd != NULL)
 491             runp->next_fd->last_fd = runp->next_prio;
 492           if (runp->last_fd != NULL)
 493             runp->last_fd->next_fd = runp->next_prio;
 494           else
 495             requests = runp->next_prio;
 496         }
 497
 498       /* Free the old element.  */
 499       __aio_free_request (runp);
 500
 501       runp = runlist;
 502       if (runp != NULL)
 503         {
 504           /* We must not run requests which are not marked `running'.  */
 505           if (runp->running == yes)
 506             runlist = runp->next_run;
 507           else
 508             {
 509               struct requestlist *old;
 510
 511               do
 512                 {
 513                   old = runp;
 514                   runp = runp->next_run;
 515                 }
 516               while (runp != NULL && runp->running != yes);
 517
 518               if (runp != NULL)
 519                 old->next_run = runp->next_run;
 520             }
 521         }
 522
 523       /* If no request to work on we will stop the thread.  */
 524       if (runp == NULL)
 525         --nthreads;
 526       else
 527         runp->running = allocated;
 528
 529       /* Release the mutex.  */
 530       pthread_mutex_unlock (&__aio_requests_mutex);
 531     }
 532   while (runp != NULL);
 533
 534   pthread_exit (NULL);
 535 }
 536
 537
 538 /* Free allocated resources.  */
 539 static void
 540 __attribute__ ((unused))
 541 free_res (void)
 542 {
 543   size_t row;
 544
 545   /* The first block of rows as specified in OPTIM is allocated in
 546      one chunk.  */
 547   free (pool[0]);
 548
 549   for (row = optim.aio_num / ENTRIES_PER_ROW; row < pool_tab_size; ++row)
 550     free (pool[row]);
 551
 552   free (pool);
 553 }
 554
 555 text_set_element (__libc_subfreeres, free_res);