src/backend/catalog/catalog.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * catalog.c
   4  *              routines concerned with catalog naming conventions and other
   5  *              bits of hard-wired knowledge
   6  *
   7  *
   8  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   9  * Portions Copyright (c) 1994, Regents of the University of California
  10  *
  11  *
  12  * IDENTIFICATION
  13  *        $PostgreSQL$
  14  *
  15  *-------------------------------------------------------------------------
  16  */
  17
  18 #include "postgres.h"
  19
  20 #include <fcntl.h>
  21 #include <unistd.h>
  22
  23 #include "access/genam.h"
  24 #include "access/sysattr.h"
  25 #include "access/transam.h"
  26 #include "catalog/catalog.h"
  27 #include "catalog/indexing.h"
  28 #include "catalog/namespace.h"
  29 #include "catalog/pg_auth_members.h"
  30 #include "catalog/pg_authid.h"
  31 #include "catalog/pg_database.h"
  32 #include "catalog/pg_namespace.h"
  33 #include "catalog/pg_pltemplate.h"
  34 #include "catalog/pg_shdepend.h"
  35 #include "catalog/pg_shdescription.h"
  36 #include "catalog/pg_tablespace.h"
  37 #include "catalog/toasting.h"
  38 #include "miscadmin.h"
  39 #include "storage/fd.h"
  40 #include "utils/fmgroids.h"
  41 #include "utils/rel.h"
  42 #include "utils/tqual.h"
  43
  44
  45 #define OIDCHARS                10                      /* max chars printed by %u */
  46 #define FORKNAMECHARS   4                       /* max chars for a fork name */
  47
  48 /*
  49  * Lookup table of fork name by fork number.
  50  *
  51  * If you add a new entry, remember to update the errhint below, and the
  52  * documentation for pg_relation_size(). Also keep FORKNAMECHARS above
  53  * up-to-date.
  54  */
  55 const char *forkNames[] = {
  56         "main", /* MAIN_FORKNUM */
  57         "fsm"   /* FSM_FORKNUM */
  58 };
  59
  60 /*
  61  * forkname_to_number - look up fork number by name
  62  */
  63 ForkNumber
  64 forkname_to_number(char *forkName)
  65 {
  66         ForkNumber forkNum;
  67
  68         for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
  69                 if (strcmp(forkNames[forkNum], forkName) == 0)
  70                         return forkNum;
  71
  72         ereport(ERROR,
  73                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  74                          errmsg("invalid fork name"),
  75                          errhint("Valid fork names are 'main' and 'fsm'")));
  76         return InvalidForkNumber; /* keep compiler quiet */
  77 }
  78
  79 /*
  80  * relpath                      - construct path to a relation's file
  81  *
  82  * Result is a palloc'd string.
  83  */
  84 char *
  85 relpath(RelFileNode rnode, ForkNumber forknum)
  86 {
  87         int                     pathlen;
  88         char       *path;
  89
  90         if (rnode.spcNode == GLOBALTABLESPACE_OID)
  91         {
  92                 /* Shared system relations live in {datadir}/global */
  93                 Assert(rnode.dbNode == 0);
  94                 pathlen = 7 + OIDCHARS + 1 + FORKNAMECHARS + 1;
  95                 path = (char *) palloc(pathlen);
  96                 if (forknum != MAIN_FORKNUM)
  97                         snprintf(path, pathlen, "global/%u_%s",
  98                                          rnode.relNode, forkNames[forknum]);
  99                 else
 100                         snprintf(path, pathlen, "global/%u", rnode.relNode);
 101         }
 102         else if (rnode.spcNode == DEFAULTTABLESPACE_OID)
 103         {
 104                 /* The default tablespace is {datadir}/base */
 105                 pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNAMECHARS + 1;
 106                 path = (char *) palloc(pathlen);
 107                 if (forknum != MAIN_FORKNUM)
 108                         snprintf(path, pathlen, "base/%u/%u_%s",
 109                                          rnode.dbNode, rnode.relNode, forkNames[forknum]);
 110                 else
 111                         snprintf(path, pathlen, "base/%u/%u",
 112                                          rnode.dbNode, rnode.relNode);
 113         }
 114         else
 115         {
 116                 /* All other tablespaces are accessed via symlinks */
 117                 pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1
 118                         + FORKNAMECHARS + 1;
 119                 path = (char *) palloc(pathlen);
 120                 if (forknum != MAIN_FORKNUM)
 121                         snprintf(path, pathlen, "pg_tblspc/%u/%u/%u_%s",
 122                                          rnode.spcNode, rnode.dbNode, rnode.relNode,
 123                                          forkNames[forknum]);
 124                 else
 125                         snprintf(path, pathlen, "pg_tblspc/%u/%u/%u",
 126                                          rnode.spcNode, rnode.dbNode, rnode.relNode);
 127         }
 128         return path;
 129 }
 130
 131 /*
 132  * GetDatabasePath                      - construct path to a database dir
 133  *
 134  * Result is a palloc'd string.
 135  *
 136  * XXX this must agree with relpath()!
 137  */
 138 char *
 139 GetDatabasePath(Oid dbNode, Oid spcNode)
 140 {
 141         int                     pathlen;
 142         char       *path;
 143
 144         if (spcNode == GLOBALTABLESPACE_OID)
 145         {
 146                 /* Shared system relations live in {datadir}/global */
 147                 Assert(dbNode == 0);
 148                 pathlen = 6 + 1;
 149                 path = (char *) palloc(pathlen);
 150                 snprintf(path, pathlen, "global");
 151         }
 152         else if (spcNode == DEFAULTTABLESPACE_OID)
 153         {
 154                 /* The default tablespace is {datadir}/base */
 155                 pathlen = 5 + OIDCHARS + 1;
 156                 path = (char *) palloc(pathlen);
 157                 snprintf(path, pathlen, "base/%u",
 158                                  dbNode);
 159         }
 160         else
 161         {
 162                 /* All other tablespaces are accessed via symlinks */
 163                 pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1;
 164                 path = (char *) palloc(pathlen);
 165                 snprintf(path, pathlen, "pg_tblspc/%u/%u",
 166                                  spcNode, dbNode);
 167         }
 168         return path;
 169 }
 170
 171
 172 /*
 173  * IsSystemRelation
 174  *              True iff the relation is a system catalog relation.
 175  *
 176  *              NB: TOAST relations are considered system relations by this test
 177  *              for compatibility with the old IsSystemRelationName function.
 178  *              This is appropriate in many places but not all.  Where it's not,
 179  *              also check IsToastRelation.
 180  *
 181  *              We now just test if the relation is in the system catalog namespace;
 182  *              so it's no longer necessary to forbid user relations from having
 183  *              names starting with pg_.
 184  */
 185 bool
 186 IsSystemRelation(Relation relation)
 187 {
 188         return IsSystemNamespace(RelationGetNamespace(relation)) ||
 189                 IsToastNamespace(RelationGetNamespace(relation));
 190 }
 191
 192 /*
 193  * IsSystemClass
 194  *              Like the above, but takes a Form_pg_class as argument.
 195  *              Used when we do not want to open the relation and have to
 196  *              search pg_class directly.
 197  */
 198 bool
 199 IsSystemClass(Form_pg_class reltuple)
 200 {
 201         Oid                     relnamespace = reltuple->relnamespace;
 202
 203         return IsSystemNamespace(relnamespace) ||
 204                 IsToastNamespace(relnamespace);
 205 }
 206
 207 /*
 208  * IsToastRelation
 209  *              True iff relation is a TOAST support relation (or index).
 210  */
 211 bool
 212 IsToastRelation(Relation relation)
 213 {
 214         return IsToastNamespace(RelationGetNamespace(relation));
 215 }
 216
 217 /*
 218  * IsToastClass
 219  *              Like the above, but takes a Form_pg_class as argument.
 220  *              Used when we do not want to open the relation and have to
 221  *              search pg_class directly.
 222  */
 223 bool
 224 IsToastClass(Form_pg_class reltuple)
 225 {
 226         Oid                     relnamespace = reltuple->relnamespace;
 227
 228         return IsToastNamespace(relnamespace);
 229 }
 230
 231 /*
 232  * IsSystemNamespace
 233  *              True iff namespace is pg_catalog.
 234  *
 235  * NOTE: the reason this isn't a macro is to avoid having to include
 236  * catalog/pg_namespace.h in a lot of places.
 237  */
 238 bool
 239 IsSystemNamespace(Oid namespaceId)
 240 {
 241         return namespaceId == PG_CATALOG_NAMESPACE;
 242 }
 243
 244 /*
 245  * IsToastNamespace
 246  *              True iff namespace is pg_toast or my temporary-toast-table namespace.
 247  *
 248  * Note: this will return false for temporary-toast-table namespaces belonging
 249  * to other backends.  Those are treated the same as other backends' regular
 250  * temp table namespaces, and access is prevented where appropriate.
 251  */
 252 bool
 253 IsToastNamespace(Oid namespaceId)
 254 {
 255         return (namespaceId == PG_TOAST_NAMESPACE) ||
 256                 isTempToastNamespace(namespaceId);
 257 }
 258
 259
 260 /*
 261  * IsReservedName
 262  *              True iff name starts with the pg_ prefix.
 263  *
 264  *              For some classes of objects, the prefix pg_ is reserved for
 265  *              system objects only.  As of 8.0, this is only true for
 266  *              schema and tablespace names.
 267  */
 268 bool
 269 IsReservedName(const char *name)
 270 {
 271         /* ugly coding for speed */
 272         return (name[0] == 'p' &&
 273                         name[1] == 'g' &&
 274                         name[2] == '_');
 275 }
 276
 277
 278 /*
 279  * IsSharedRelation
 280  *              Given the OID of a relation, determine whether it's supposed to be
 281  *              shared across an entire database cluster.
 282  *
 283  * Hard-wiring this list is pretty grotty, but we really need it so that
 284  * we can compute the locktag for a relation (and then lock it) without
 285  * having already read its pg_class entry.      If we try to retrieve relisshared
 286  * from pg_class with no pre-existing lock, there is a race condition against
 287  * anyone who is concurrently committing a change to the pg_class entry:
 288  * since we read system catalog entries under SnapshotNow, it's possible
 289  * that both the old and new versions of the row are invalid at the instants
 290  * we scan them.  We fix this by insisting that updaters of a pg_class
 291  * row must hold exclusive lock on the corresponding rel, and that users
 292  * of a relation must hold at least AccessShareLock on the rel *before*
 293  * trying to open its relcache entry.  But to lock a rel, you have to
 294  * know if it's shared.  Fortunately, the set of shared relations is
 295  * fairly static, so a hand-maintained list of their OIDs isn't completely
 296  * impractical.
 297  */
 298 bool
 299 IsSharedRelation(Oid relationId)
 300 {
 301         /* These are the shared catalogs (look for BKI_SHARED_RELATION) */
 302         if (relationId == AuthIdRelationId ||
 303                 relationId == AuthMemRelationId ||
 304                 relationId == DatabaseRelationId ||
 305                 relationId == PLTemplateRelationId ||
 306                 relationId == SharedDescriptionRelationId ||
 307                 relationId == SharedDependRelationId ||
 308                 relationId == TableSpaceRelationId)
 309                 return true;
 310         /* These are their indexes (see indexing.h) */
 311         if (relationId == AuthIdRolnameIndexId ||
 312                 relationId == AuthIdOidIndexId ||
 313                 relationId == AuthMemRoleMemIndexId ||
 314                 relationId == AuthMemMemRoleIndexId ||
 315                 relationId == DatabaseNameIndexId ||
 316                 relationId == DatabaseOidIndexId ||
 317                 relationId == PLTemplateNameIndexId ||
 318                 relationId == SharedDescriptionObjIndexId ||
 319                 relationId == SharedDependDependerIndexId ||
 320                 relationId == SharedDependReferenceIndexId ||
 321                 relationId == TablespaceOidIndexId ||
 322                 relationId == TablespaceNameIndexId)
 323                 return true;
 324         /* These are their toast tables and toast indexes (see toasting.h) */
 325         if (relationId == PgAuthidToastTable ||
 326                 relationId == PgAuthidToastIndex ||
 327                 relationId == PgDatabaseToastTable ||
 328                 relationId == PgDatabaseToastIndex ||
 329                 relationId == PgShdescriptionToastTable ||
 330                 relationId == PgShdescriptionToastIndex)
 331                 return true;
 332         return false;
 333 }
 334
 335
 336 /*
 337  * GetNewOid
 338  *              Generate a new OID that is unique within the given relation.
 339  *
 340  * Caller must have a suitable lock on the relation.
 341  *
 342  * Uniqueness is promised only if the relation has a unique index on OID.
 343  * This is true for all system catalogs that have OIDs, but might not be
 344  * true for user tables.  Note that we are effectively assuming that the
 345  * table has a relatively small number of entries (much less than 2^32)
 346  * and there aren't very long runs of consecutive existing OIDs.  Again,
 347  * this is reasonable for system catalogs but less so for user tables.
 348  *
 349  * Since the OID is not immediately inserted into the table, there is a
 350  * race condition here; but a problem could occur only if someone else
 351  * managed to cycle through 2^32 OIDs and generate the same OID before we
 352  * finish inserting our row.  This seems unlikely to be a problem.      Note
 353  * that if we had to *commit* the row to end the race condition, the risk
 354  * would be rather higher; therefore we use SnapshotDirty in the test,
 355  * so that we will see uncommitted rows.
 356  */
 357 Oid
 358 GetNewOid(Relation relation)
 359 {
 360         Oid                     oidIndex;
 361
 362         /* If relation doesn't have OIDs at all, caller is confused */
 363         Assert(relation->rd_rel->relhasoids);
 364
 365         /* In bootstrap mode, we don't have any indexes to use */
 366         if (IsBootstrapProcessingMode())
 367                 return GetNewObjectId();
 368
 369         /* The relcache will cache the identity of the OID index for us */
 370         oidIndex = RelationGetOidIndex(relation);
 371
 372         /* If no OID index, just hand back the next OID counter value */
 373         if (!OidIsValid(oidIndex))
 374         {
 375                 /*
 376                  * System catalogs that have OIDs should *always* have a unique OID
 377                  * index; we should only take this path for user tables. Give a
 378                  * warning if it looks like somebody forgot an index.
 379                  */
 380                 if (IsSystemRelation(relation))
 381                         elog(WARNING, "generating possibly-non-unique OID for \"%s\"",
 382                                  RelationGetRelationName(relation));
 383
 384                 return GetNewObjectId();
 385         }
 386
 387         /* Otherwise, use the index to find a nonconflicting OID */
 388         return GetNewOidWithIndex(relation, oidIndex, ObjectIdAttributeNumber);
 389 }
 390
 391 /*
 392  * GetNewOidWithIndex
 393  *              Guts of GetNewOid: use the supplied index
 394  *
 395  * This is exported separately because there are cases where we want to use
 396  * an index that will not be recognized by RelationGetOidIndex: TOAST tables
 397  * and pg_largeobject have indexes that are usable, but have multiple columns
 398  * and are on ordinary columns rather than a true OID column.  This code
 399  * will work anyway, so long as the OID is the index's first column.  The
 400  * caller must pass in the actual heap attnum of the OID column, however.
 401  *
 402  * Caller must have a suitable lock on the relation.
 403  */
 404 Oid
 405 GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
 406 {
 407         Oid                     newOid;
 408         SnapshotData SnapshotDirty;
 409         SysScanDesc     scan;
 410         ScanKeyData key;
 411         bool            collides;
 412
 413         InitDirtySnapshot(SnapshotDirty);
 414
 415         /* Generate new OIDs until we find one not in the table */
 416         do
 417         {
 418                 CHECK_FOR_INTERRUPTS();
 419
 420                 newOid = GetNewObjectId();
 421
 422                 ScanKeyInit(&key,
 423                                         oidcolumn,
 424                                         BTEqualStrategyNumber, F_OIDEQ,
 425                                         ObjectIdGetDatum(newOid));
 426
 427                 /* see notes above about using SnapshotDirty */
 428                 scan = systable_beginscan(relation, indexId, true,
 429                                                                   &SnapshotDirty, 1, &key);
 430
 431                 collides = HeapTupleIsValid(systable_getnext(scan));
 432
 433                 systable_endscan(scan);
 434         } while (collides);
 435
 436         return newOid;
 437 }
 438
 439 /*
 440  * GetNewRelFileNode
 441  *              Generate a new relfilenode number that is unique within the given
 442  *              tablespace.
 443  *
 444  * If the relfilenode will also be used as the relation's OID, pass the
 445  * opened pg_class catalog, and this routine will guarantee that the result
 446  * is also an unused OID within pg_class.  If the result is to be used only
 447  * as a relfilenode for an existing relation, pass NULL for pg_class.
 448  *
 449  * As with GetNewOid, there is some theoretical risk of a race condition,
 450  * but it doesn't seem worth worrying about.
 451  *
 452  * Note: we don't support using this in bootstrap mode.  All relations
 453  * created by bootstrap have preassigned OIDs, so there's no need.
 454  */
 455 Oid
 456 GetNewRelFileNode(Oid reltablespace, bool relisshared, Relation pg_class)
 457 {
 458         RelFileNode rnode;
 459         char       *rpath;
 460         int                     fd;
 461         bool            collides;
 462
 463         /* This should match RelationInitPhysicalAddr */
 464         rnode.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace;
 465         rnode.dbNode = relisshared ? InvalidOid : MyDatabaseId;
 466
 467         do
 468         {
 469                 CHECK_FOR_INTERRUPTS();
 470
 471                 /* Generate the OID */
 472                 if (pg_class)
 473                         rnode.relNode = GetNewOid(pg_class);
 474                 else
 475                         rnode.relNode = GetNewObjectId();
 476
 477                 /* Check for existing file of same name */
 478                 rpath = relpath(rnode, MAIN_FORKNUM);
 479                 fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0);
 480
 481                 if (fd >= 0)
 482                 {
 483                         /* definite collision */
 484                         close(fd);
 485                         collides = true;
 486                 }
 487                 else
 488                 {
 489                         /*
 490                          * Here we have a little bit of a dilemma: if errno is something
 491                          * other than ENOENT, should we declare a collision and loop? In
 492                          * particular one might think this advisable for, say, EPERM.
 493                          * However there really shouldn't be any unreadable files in a
 494                          * tablespace directory, and if the EPERM is actually complaining
 495                          * that we can't read the directory itself, we'd be in an infinite
 496                          * loop.  In practice it seems best to go ahead regardless of the
 497                          * errno.  If there is a colliding file we will get an smgr
 498                          * failure when we attempt to create the new relation file.
 499                          */
 500                         collides = false;
 501                 }
 502
 503                 pfree(rpath);
 504         } while (collides);
 505
 506         return rnode.relNode;
 507 }