Merged revisions 85328 via svnmerge from
[python/dscho.git] / Lib / uuid.py
blob6e687943d66f8624a8ab67c29f709ee01280a812
1 r"""UUID objects (universally unique identifiers) according to RFC 4122.
3 This module provides immutable UUID objects (class UUID) and the functions
4 uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5
5 UUIDs as specified in RFC 4122.
7 If all you want is a unique ID, you should probably call uuid1() or uuid4().
8 Note that uuid1() may compromise privacy since it creates a UUID containing
9 the computer's network address. uuid4() creates a random UUID.
11 Typical usage:
13 >>> import uuid
15 # make a UUID based on the host ID and current time
16 >>> uuid.uuid1() # doctest: +SKIP
17 UUID('a8098c1a-f86e-11da-bd1a-00112444be1e')
19 # make a UUID using an MD5 hash of a namespace UUID and a name
20 >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org')
21 UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e')
23 # make a random UUID
24 >>> uuid.uuid4() # doctest: +SKIP
25 UUID('16fd2706-8baf-433b-82eb-8c7fada847da')
27 # make a UUID using a SHA-1 hash of a namespace UUID and a name
28 >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org')
29 UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d')
31 # make a UUID from a string of hex digits (braces and hyphens ignored)
32 >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}')
34 # convert a UUID to a string of hex digits in standard form
35 >>> str(x)
36 '00010203-0405-0607-0809-0a0b0c0d0e0f'
38 # get the raw 16 bytes of the UUID
39 >>> x.bytes
40 b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f'
42 # make a UUID from a 16-byte string
43 >>> uuid.UUID(bytes=x.bytes)
44 UUID('00010203-0405-0607-0809-0a0b0c0d0e0f')
45 """
47 __author__ = 'Ka-Ping Yee <ping@zesty.ca>'
49 RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [
50 'reserved for NCS compatibility', 'specified in RFC 4122',
51 'reserved for Microsoft compatibility', 'reserved for future definition']
53 int_ = int # The built-in int type
54 bytes_ = bytes # The built-in bytes type
56 class UUID(object):
57 """Instances of the UUID class represent UUIDs as specified in RFC 4122.
58 UUID objects are immutable, hashable, and usable as dictionary keys.
59 Converting a UUID to a string with str() yields something in the form
60 '12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts
61 five possible forms: a similar string of hexadecimal digits, or a tuple
62 of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and
63 48-bit values respectively) as an argument named 'fields', or a string
64 of 16 bytes (with all the integer fields in big-endian order) as an
65 argument named 'bytes', or a string of 16 bytes (with the first three
66 fields in little-endian order) as an argument named 'bytes_le', or a
67 single 128-bit integer as an argument named 'int'.
69 UUIDs have these read-only attributes:
71 bytes the UUID as a 16-byte string (containing the six
72 integer fields in big-endian byte order)
74 bytes_le the UUID as a 16-byte string (with time_low, time_mid,
75 and time_hi_version in little-endian byte order)
77 fields a tuple of the six integer fields of the UUID,
78 which are also available as six individual attributes
79 and two derived attributes:
81 time_low the first 32 bits of the UUID
82 time_mid the next 16 bits of the UUID
83 time_hi_version the next 16 bits of the UUID
84 clock_seq_hi_variant the next 8 bits of the UUID
85 clock_seq_low the next 8 bits of the UUID
86 node the last 48 bits of the UUID
88 time the 60-bit timestamp
89 clock_seq the 14-bit sequence number
91 hex the UUID as a 32-character hexadecimal string
93 int the UUID as a 128-bit integer
95 urn the UUID as a URN as specified in RFC 4122
97 variant the UUID variant (one of the constants RESERVED_NCS,
98 RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE)
100 version the UUID version number (1 through 5, meaningful only
101 when the variant is RFC_4122)
104 def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None,
105 int=None, version=None):
106 r"""Create a UUID from either a string of 32 hexadecimal digits,
107 a string of 16 bytes as the 'bytes' argument, a string of 16 bytes
108 in little-endian order as the 'bytes_le' argument, a tuple of six
109 integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version,
110 8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as
111 the 'fields' argument, or a single 128-bit integer as the 'int'
112 argument. When a string of hex digits is given, curly braces,
113 hyphens, and a URN prefix are all optional. For example, these
114 expressions all yield the same UUID:
116 UUID('{12345678-1234-5678-1234-567812345678}')
117 UUID('12345678123456781234567812345678')
118 UUID('urn:uuid:12345678-1234-5678-1234-567812345678')
119 UUID(bytes='\x12\x34\x56\x78'*4)
120 UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' +
121 '\x12\x34\x56\x78\x12\x34\x56\x78')
122 UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678))
123 UUID(int=0x12345678123456781234567812345678)
125 Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must
126 be given. The 'version' argument is optional; if given, the resulting
127 UUID will have its variant and version set according to RFC 4122,
128 overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'.
131 if [hex, bytes, bytes_le, fields, int].count(None) != 4:
132 raise TypeError('need one of hex, bytes, bytes_le, fields, or int')
133 if hex is not None:
134 hex = hex.replace('urn:', '').replace('uuid:', '')
135 hex = hex.strip('{}').replace('-', '')
136 if len(hex) != 32:
137 raise ValueError('badly formed hexadecimal UUID string')
138 int = int_(hex, 16)
139 if bytes_le is not None:
140 if len(bytes_le) != 16:
141 raise ValueError('bytes_le is not a 16-char string')
142 bytes = (bytes_(reversed(bytes_le[0:4])) +
143 bytes_(reversed(bytes_le[4:6])) +
144 bytes_(reversed(bytes_le[6:8])) +
145 bytes_le[8:])
146 if bytes is not None:
147 if len(bytes) != 16:
148 raise ValueError('bytes is not a 16-char string')
149 assert isinstance(bytes, bytes_), repr(bytes)
150 int = int_(('%02x'*16) % tuple(bytes), 16)
151 if fields is not None:
152 if len(fields) != 6:
153 raise ValueError('fields is not a 6-tuple')
154 (time_low, time_mid, time_hi_version,
155 clock_seq_hi_variant, clock_seq_low, node) = fields
156 if not 0 <= time_low < 1<<32:
157 raise ValueError('field 1 out of range (need a 32-bit value)')
158 if not 0 <= time_mid < 1<<16:
159 raise ValueError('field 2 out of range (need a 16-bit value)')
160 if not 0 <= time_hi_version < 1<<16:
161 raise ValueError('field 3 out of range (need a 16-bit value)')
162 if not 0 <= clock_seq_hi_variant < 1<<8:
163 raise ValueError('field 4 out of range (need an 8-bit value)')
164 if not 0 <= clock_seq_low < 1<<8:
165 raise ValueError('field 5 out of range (need an 8-bit value)')
166 if not 0 <= node < 1<<48:
167 raise ValueError('field 6 out of range (need a 48-bit value)')
168 clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
169 int = ((time_low << 96) | (time_mid << 80) |
170 (time_hi_version << 64) | (clock_seq << 48) | node)
171 if int is not None:
172 if not 0 <= int < 1<<128:
173 raise ValueError('int is out of range (need a 128-bit value)')
174 if version is not None:
175 if not 1 <= version <= 5:
176 raise ValueError('illegal version number')
177 # Set the variant to RFC 4122.
178 int &= ~(0xc000 << 48)
179 int |= 0x8000 << 48
180 # Set the version number.
181 int &= ~(0xf000 << 64)
182 int |= version << 76
183 self.__dict__['int'] = int
185 def __eq__(self, other):
186 if isinstance(other, UUID):
187 return self.int == other.int
188 return NotImplemented
190 def __ne__(self, other):
191 if isinstance(other, UUID):
192 return self.int != other.int
193 return NotImplemented
195 # Q. What's the value of being able to sort UUIDs?
196 # A. Use them as keys in a B-Tree or similar mapping.
198 def __lt__(self, other):
199 if isinstance(other, UUID):
200 return self.int < other.int
201 return NotImplemented
203 def __gt__(self, other):
204 if isinstance(other, UUID):
205 return self.int > other.int
206 return NotImplemented
208 def __le__(self, other):
209 if isinstance(other, UUID):
210 return self.int <= other.int
211 return NotImplemented
213 def __ge__(self, other):
214 if isinstance(other, UUID):
215 return self.int >= other.int
216 return NotImplemented
218 def __hash__(self):
219 return hash(self.int)
221 def __int__(self):
222 return self.int
224 def __repr__(self):
225 return 'UUID(%r)' % str(self)
227 def __setattr__(self, name, value):
228 raise TypeError('UUID objects are immutable')
230 def __str__(self):
231 hex = '%032x' % self.int
232 return '%s-%s-%s-%s-%s' % (
233 hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:])
235 @property
236 def bytes(self):
237 bytes = bytearray()
238 for shift in range(0, 128, 8):
239 bytes.insert(0, (self.int >> shift) & 0xff)
240 return bytes_(bytes)
242 @property
243 def bytes_le(self):
244 bytes = self.bytes
245 return (bytes_(reversed(bytes[0:4])) +
246 bytes_(reversed(bytes[4:6])) +
247 bytes_(reversed(bytes[6:8])) +
248 bytes[8:])
250 @property
251 def fields(self):
252 return (self.time_low, self.time_mid, self.time_hi_version,
253 self.clock_seq_hi_variant, self.clock_seq_low, self.node)
255 @property
256 def time_low(self):
257 return self.int >> 96
259 @property
260 def time_mid(self):
261 return (self.int >> 80) & 0xffff
263 @property
264 def time_hi_version(self):
265 return (self.int >> 64) & 0xffff
267 @property
268 def clock_seq_hi_variant(self):
269 return (self.int >> 56) & 0xff
271 @property
272 def clock_seq_low(self):
273 return (self.int >> 48) & 0xff
275 @property
276 def time(self):
277 return (((self.time_hi_version & 0x0fff) << 48) |
278 (self.time_mid << 32) | self.time_low)
280 @property
281 def clock_seq(self):
282 return (((self.clock_seq_hi_variant & 0x3f) << 8) |
283 self.clock_seq_low)
285 @property
286 def node(self):
287 return self.int & 0xffffffffffff
289 @property
290 def hex(self):
291 return '%032x' % self.int
293 @property
294 def urn(self):
295 return 'urn:uuid:' + str(self)
297 @property
298 def variant(self):
299 if not self.int & (0x8000 << 48):
300 return RESERVED_NCS
301 elif not self.int & (0x4000 << 48):
302 return RFC_4122
303 elif not self.int & (0x2000 << 48):
304 return RESERVED_MICROSOFT
305 else:
306 return RESERVED_FUTURE
308 @property
309 def version(self):
310 # The version bits are only meaningful for RFC 4122 UUIDs.
311 if self.variant == RFC_4122:
312 return int((self.int >> 76) & 0xf)
314 def _find_mac(command, args, hw_identifiers, get_index):
315 import os
316 for dir in ['', '/sbin/', '/usr/sbin']:
317 executable = os.path.join(dir, command)
318 if not os.path.exists(executable):
319 continue
321 try:
322 # LC_ALL to get English output, 2>/dev/null to
323 # prevent output on stderr
324 cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args)
325 pipe = os.popen(cmd)
326 except IOError:
327 continue
329 for line in pipe:
330 words = line.lower().split()
331 for i in range(len(words)):
332 if words[i] in hw_identifiers:
333 return int(words[get_index(i)].replace(':', ''), 16)
334 return None
336 def _ifconfig_getnode():
337 """Get the hardware address on Unix by running ifconfig."""
339 # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes.
340 for args in ('', '-a', '-av'):
341 mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1)
342 if mac:
343 return mac
345 import socket
346 ip_addr = socket.gethostbyname(socket.gethostname())
348 # Try getting the MAC addr from arp based on our IP address (Solaris).
349 mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1)
350 if mac:
351 return mac
353 # This might work on HP-UX.
354 mac = _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0)
355 if mac:
356 return mac
358 return None
360 def _ipconfig_getnode():
361 """Get the hardware address on Windows by running ipconfig.exe."""
362 import os, re
363 dirs = ['', r'c:\windows\system32', r'c:\winnt\system32']
364 try:
365 import ctypes
366 buffer = ctypes.create_string_buffer(300)
367 ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300)
368 dirs.insert(0, buffer.value.decode('mbcs'))
369 except:
370 pass
371 for dir in dirs:
372 try:
373 pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all')
374 except IOError:
375 continue
376 for line in pipe:
377 value = line.split(':')[-1].strip().lower()
378 if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value):
379 return int(value.replace('-', ''), 16)
381 def _netbios_getnode():
382 """Get the hardware address on Windows using NetBIOS calls.
383 See http://support.microsoft.com/kb/118623 for details."""
384 import win32wnet, netbios
385 ncb = netbios.NCB()
386 ncb.Command = netbios.NCBENUM
387 ncb.Buffer = adapters = netbios.LANA_ENUM()
388 adapters._pack()
389 if win32wnet.Netbios(ncb) != 0:
390 return
391 adapters._unpack()
392 for i in range(adapters.length):
393 ncb.Reset()
394 ncb.Command = netbios.NCBRESET
395 ncb.Lana_num = ord(adapters.lana[i])
396 if win32wnet.Netbios(ncb) != 0:
397 continue
398 ncb.Reset()
399 ncb.Command = netbios.NCBASTAT
400 ncb.Lana_num = ord(adapters.lana[i])
401 ncb.Callname = '*'.ljust(16)
402 ncb.Buffer = status = netbios.ADAPTER_STATUS()
403 if win32wnet.Netbios(ncb) != 0:
404 continue
405 status._unpack()
406 bytes = map(ord, status.adapter_address)
407 return ((bytes[0]<<40) + (bytes[1]<<32) + (bytes[2]<<24) +
408 (bytes[3]<<16) + (bytes[4]<<8) + bytes[5])
410 # Thanks to Thomas Heller for ctypes and for his help with its use here.
412 # If ctypes is available, use it to find system routines for UUID generation.
413 # XXX This makes the module non-thread-safe!
414 _uuid_generate_random = _uuid_generate_time = _UuidCreate = None
415 try:
416 import ctypes, ctypes.util
418 # The uuid_generate_* routines are provided by libuuid on at least
419 # Linux and FreeBSD, and provided by libc on Mac OS X.
420 for libname in ['uuid', 'c']:
421 try:
422 lib = ctypes.CDLL(ctypes.util.find_library(libname))
423 except:
424 continue
425 if hasattr(lib, 'uuid_generate_random'):
426 _uuid_generate_random = lib.uuid_generate_random
427 if hasattr(lib, 'uuid_generate_time'):
428 _uuid_generate_time = lib.uuid_generate_time
430 # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
431 # in issue #8621 the function generates the same sequence of values
432 # in the parent process and all children created using fork (unless
433 # those children use exec as well).
435 # Assume that the uuid_generate functions are broken from 10.5 onward,
436 # the test can be adjusted when a later version is fixed.
437 import sys
438 if sys.platform == 'darwin':
439 import os
440 if int(os.uname()[2].split('.')[0]) >= 9:
441 _uuid_generate_random = _uuid_generate_time = None
443 # On Windows prior to 2000, UuidCreate gives a UUID containing the
444 # hardware address. On Windows 2000 and later, UuidCreate makes a
445 # random UUID and UuidCreateSequential gives a UUID containing the
446 # hardware address. These routines are provided by the RPC runtime.
447 # NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last
448 # 6 bytes returned by UuidCreateSequential are fixed, they don't appear
449 # to bear any relationship to the MAC address of any network device
450 # on the box.
451 try:
452 lib = ctypes.windll.rpcrt4
453 except:
454 lib = None
455 _UuidCreate = getattr(lib, 'UuidCreateSequential',
456 getattr(lib, 'UuidCreate', None))
457 except:
458 pass
460 def _unixdll_getnode():
461 """Get the hardware address on Unix using ctypes."""
462 _buffer = ctypes.create_string_buffer(16)
463 _uuid_generate_time(_buffer)
464 return UUID(bytes=bytes_(_buffer.raw)).node
466 def _windll_getnode():
467 """Get the hardware address on Windows using ctypes."""
468 _buffer = ctypes.create_string_buffer(16)
469 if _UuidCreate(_buffer) == 0:
470 return UUID(bytes=bytes_(_buffer.raw)).node
472 def _random_getnode():
473 """Get a random node ID, with eighth bit set as suggested by RFC 4122."""
474 import random
475 return random.randrange(0, 1<<48) | 0x010000000000
477 _node = None
479 def getnode():
480 """Get the hardware address as a 48-bit positive integer.
482 The first time this runs, it may launch a separate program, which could
483 be quite slow. If all attempts to obtain the hardware address fail, we
484 choose a random 48-bit number with its eighth bit set to 1 as recommended
485 in RFC 4122.
488 global _node
489 if _node is not None:
490 return _node
492 import sys
493 if sys.platform == 'win32':
494 getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode]
495 else:
496 getters = [_unixdll_getnode, _ifconfig_getnode]
498 for getter in getters + [_random_getnode]:
499 try:
500 _node = getter()
501 except:
502 continue
503 if _node is not None:
504 return _node
506 _last_timestamp = None
508 def uuid1(node=None, clock_seq=None):
509 """Generate a UUID from a host ID, sequence number, and the current time.
510 If 'node' is not given, getnode() is used to obtain the hardware
511 address. If 'clock_seq' is given, it is used as the sequence number;
512 otherwise a random 14-bit sequence number is chosen."""
514 # When the system provides a version-1 UUID generator, use it (but don't
515 # use UuidCreate here because its UUIDs don't conform to RFC 4122).
516 if _uuid_generate_time and node is clock_seq is None:
517 _buffer = ctypes.create_string_buffer(16)
518 _uuid_generate_time(_buffer)
519 return UUID(bytes=bytes_(_buffer.raw))
521 global _last_timestamp
522 import time
523 nanoseconds = int(time.time() * 1e9)
524 # 0x01b21dd213814000 is the number of 100-ns intervals between the
525 # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
526 timestamp = int(nanoseconds/100) + 0x01b21dd213814000
527 if _last_timestamp is not None and timestamp <= _last_timestamp:
528 timestamp = _last_timestamp + 1
529 _last_timestamp = timestamp
530 if clock_seq is None:
531 import random
532 clock_seq = random.randrange(1<<14) # instead of stable storage
533 time_low = timestamp & 0xffffffff
534 time_mid = (timestamp >> 32) & 0xffff
535 time_hi_version = (timestamp >> 48) & 0x0fff
536 clock_seq_low = clock_seq & 0xff
537 clock_seq_hi_variant = (clock_seq >> 8) & 0x3f
538 if node is None:
539 node = getnode()
540 return UUID(fields=(time_low, time_mid, time_hi_version,
541 clock_seq_hi_variant, clock_seq_low, node), version=1)
543 def uuid3(namespace, name):
544 """Generate a UUID from the MD5 hash of a namespace UUID and a name."""
545 from hashlib import md5
546 hash = md5(namespace.bytes + bytes(name, "utf-8")).digest()
547 return UUID(bytes=hash[:16], version=3)
549 def uuid4():
550 """Generate a random UUID."""
552 # When the system provides a version-4 UUID generator, use it.
553 if _uuid_generate_random:
554 _buffer = ctypes.create_string_buffer(16)
555 _uuid_generate_random(_buffer)
556 return UUID(bytes=bytes_(_buffer.raw))
558 # Otherwise, get randomness from urandom or the 'random' module.
559 try:
560 import os
561 return UUID(bytes=os.urandom(16), version=4)
562 except:
563 import random
564 bytes = bytes_(random.randrange(256) for i in range(16))
565 return UUID(bytes=bytes, version=4)
567 def uuid5(namespace, name):
568 """Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
569 from hashlib import sha1
570 hash = sha1(namespace.bytes + bytes(name, "utf-8")).digest()
571 return UUID(bytes=hash[:16], version=5)
573 # The following standard UUIDs are for use with uuid3() or uuid5().
575 NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
576 NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8')
577 NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8')
578 NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8')