Issue #5768: Change to Unicode output logic and test case for same.
[python.git] / Modules / _hashopenssl.c
blob7b5a2e52bc899bb035804024b8eef4e9dfd045b3
1 /* Module that wraps all OpenSSL hash algorithms */
3 /*
4 * Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
5 * Licensed to PSF under a Contributor Agreement.
7 * Derived from a skeleton of shamodule.c containing work performed by:
9 * Andrew Kuchling (amk@amk.ca)
10 * Greg Stein (gstein@lyra.org)
14 #define PY_SSIZE_T_CLEAN
16 #include "Python.h"
17 #include "structmember.h"
18 #include "hashlib.h"
20 /* EVP is the preferred interface to hashing in OpenSSL */
21 #include <openssl/evp.h>
23 #define MUNCH_SIZE INT_MAX
26 #ifndef HASH_OBJ_CONSTRUCTOR
27 #define HASH_OBJ_CONSTRUCTOR 0
28 #endif
30 typedef struct {
31 PyObject_HEAD
32 PyObject *name; /* name of this hash algorithm */
33 EVP_MD_CTX ctx; /* OpenSSL message digest context */
35 * TODO investigate performance impact of including a lock for this object
36 * here and releasing the Python GIL while hash updates are in progress.
37 * (perhaps only release GIL if input length will take long to process?)
39 } EVPobject;
42 static PyTypeObject EVPtype;
45 #define DEFINE_CONSTS_FOR_NEW(Name) \
46 static PyObject *CONST_ ## Name ## _name_obj; \
47 static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \
48 static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL;
50 DEFINE_CONSTS_FOR_NEW(md5)
51 DEFINE_CONSTS_FOR_NEW(sha1)
52 DEFINE_CONSTS_FOR_NEW(sha224)
53 DEFINE_CONSTS_FOR_NEW(sha256)
54 DEFINE_CONSTS_FOR_NEW(sha384)
55 DEFINE_CONSTS_FOR_NEW(sha512)
58 static EVPobject *
59 newEVPobject(PyObject *name)
61 EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype);
63 /* save the name for .name to return */
64 if (retval != NULL) {
65 Py_INCREF(name);
66 retval->name = name;
69 return retval;
72 /* Internal methods for a hash object */
74 static void
75 EVP_dealloc(PyObject *ptr)
77 EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx);
78 Py_XDECREF(((EVPobject *)ptr)->name);
79 PyObject_Del(ptr);
83 /* External methods for a hash object */
85 PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
87 static PyObject *
88 EVP_copy(EVPobject *self, PyObject *unused)
90 EVPobject *newobj;
92 if ( (newobj = newEVPobject(self->name))==NULL)
93 return NULL;
95 EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
96 return (PyObject *)newobj;
99 PyDoc_STRVAR(EVP_digest__doc__,
100 "Return the digest value as a string of binary data.");
102 static PyObject *
103 EVP_digest(EVPobject *self, PyObject *unused)
105 unsigned char digest[EVP_MAX_MD_SIZE];
106 EVP_MD_CTX temp_ctx;
107 PyObject *retval;
108 unsigned int digest_size;
110 EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
111 digest_size = EVP_MD_CTX_size(&temp_ctx);
112 EVP_DigestFinal(&temp_ctx, digest, NULL);
114 retval = PyString_FromStringAndSize((const char *)digest, digest_size);
115 EVP_MD_CTX_cleanup(&temp_ctx);
116 return retval;
119 PyDoc_STRVAR(EVP_hexdigest__doc__,
120 "Return the digest value as a string of hexadecimal digits.");
122 static PyObject *
123 EVP_hexdigest(EVPobject *self, PyObject *unused)
125 unsigned char digest[EVP_MAX_MD_SIZE];
126 EVP_MD_CTX temp_ctx;
127 PyObject *retval;
128 char *hex_digest;
129 unsigned int i, j, digest_size;
131 /* Get the raw (binary) digest value */
132 EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
133 digest_size = EVP_MD_CTX_size(&temp_ctx);
134 EVP_DigestFinal(&temp_ctx, digest, NULL);
136 EVP_MD_CTX_cleanup(&temp_ctx);
138 /* Create a new string */
139 /* NOTE: not thread safe! modifying an already created string object */
140 /* (not a problem because we hold the GIL by default) */
141 retval = PyString_FromStringAndSize(NULL, digest_size * 2);
142 if (!retval)
143 return NULL;
144 hex_digest = PyString_AsString(retval);
145 if (!hex_digest) {
146 Py_DECREF(retval);
147 return NULL;
150 /* Make hex version of the digest */
151 for(i=j=0; i<digest_size; i++) {
152 char c;
153 c = (digest[i] >> 4) & 0xf;
154 c = (c>9) ? c+'a'-10 : c + '0';
155 hex_digest[j++] = c;
156 c = (digest[i] & 0xf);
157 c = (c>9) ? c+'a'-10 : c + '0';
158 hex_digest[j++] = c;
160 return retval;
163 PyDoc_STRVAR(EVP_update__doc__,
164 "Update this hash object's state with the provided string.");
166 static PyObject *
167 EVP_update(EVPobject *self, PyObject *args)
169 PyObject *obj;
170 Py_buffer view;
172 if (!PyArg_ParseTuple(args, "O:update", &obj))
173 return NULL;
175 GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL);
177 if (view.len > 0 && view.len <= MUNCH_SIZE) {
178 EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
179 Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
180 } else {
181 Py_ssize_t len = view.len;
182 unsigned char *cp = (unsigned char *)view.buf;
183 while (len > 0) {
184 unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
185 EVP_DigestUpdate(&self->ctx, cp, process);
186 len -= process;
187 cp += process;
191 PyBuffer_Release(&view);
193 Py_INCREF(Py_None);
194 return Py_None;
197 static PyMethodDef EVP_methods[] = {
198 {"update", (PyCFunction)EVP_update, METH_VARARGS, EVP_update__doc__},
199 {"digest", (PyCFunction)EVP_digest, METH_NOARGS, EVP_digest__doc__},
200 {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS, EVP_hexdigest__doc__},
201 {"copy", (PyCFunction)EVP_copy, METH_NOARGS, EVP_copy__doc__},
202 {NULL, NULL} /* sentinel */
205 static PyObject *
206 EVP_get_block_size(EVPobject *self, void *closure)
208 return PyInt_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx));
211 static PyObject *
212 EVP_get_digest_size(EVPobject *self, void *closure)
214 return PyInt_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx));
217 static PyMemberDef EVP_members[] = {
218 {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")},
219 {NULL} /* Sentinel */
222 static PyGetSetDef EVP_getseters[] = {
223 {"digest_size",
224 (getter)EVP_get_digest_size, NULL,
225 NULL,
226 NULL},
227 {"block_size",
228 (getter)EVP_get_block_size, NULL,
229 NULL,
230 NULL},
231 /* the old md5 and sha modules support 'digest_size' as in PEP 247.
232 * the old sha module also supported 'digestsize'. ugh. */
233 {"digestsize",
234 (getter)EVP_get_digest_size, NULL,
235 NULL,
236 NULL},
237 {NULL} /* Sentinel */
241 static PyObject *
242 EVP_repr(PyObject *self)
244 char buf[100];
245 PyOS_snprintf(buf, sizeof(buf), "<%s HASH object @ %p>",
246 PyString_AsString(((EVPobject *)self)->name), self);
247 return PyString_FromString(buf);
250 #if HASH_OBJ_CONSTRUCTOR
251 static int
252 EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
254 static char *kwlist[] = {"name", "string", NULL};
255 PyObject *name_obj = NULL;
256 PyObject *data_obj = NULL;
257 Py_buffer view;
258 char *nameStr;
259 const EVP_MD *digest;
261 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:HASH", kwlist,
262 &name_obj, &data_obj)) {
263 return -1;
266 if (data_obj)
267 GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, -1);
269 if (!PyArg_Parse(name_obj, "s", &nameStr)) {
270 PyErr_SetString(PyExc_TypeError, "name must be a string");
271 if (data_obj)
272 PyBuffer_Release(&view);
273 return -1;
276 digest = EVP_get_digestbyname(nameStr);
277 if (!digest) {
278 PyErr_SetString(PyExc_ValueError, "unknown hash function");
279 if (data_obj)
280 PyBuffer_Release(&view);
281 return -1;
283 EVP_DigestInit(&self->ctx, digest);
285 self->name = name_obj;
286 Py_INCREF(self->name);
288 if (data_obj) {
289 if (view.len > 0 && view.len <= MUNCH_SIZE) {
290 EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
291 Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
292 } else {
293 Py_ssize_t len = view.len;
294 unsigned char *cp = (unsigned char*)view.buf;
295 while (len > 0) {
296 unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
297 EVP_DigestUpdate(&self->ctx, cp, process);
298 len -= process;
299 cp += process;
302 PyBuffer_Release(&view);
305 return 0;
307 #endif
310 PyDoc_STRVAR(hashtype_doc,
311 "A hash represents the object used to calculate a checksum of a\n\
312 string of information.\n\
314 Methods:\n\
316 update() -- updates the current digest with an additional string\n\
317 digest() -- return the current digest value\n\
318 hexdigest() -- return the current digest as a string of hexadecimal digits\n\
319 copy() -- return a copy of the current hash object\n\
321 Attributes:\n\
323 name -- the hash algorithm being used by this object\n\
324 digest_size -- number of bytes in this hashes output\n");
326 static PyTypeObject EVPtype = {
327 PyVarObject_HEAD_INIT(NULL, 0)
328 "_hashlib.HASH", /*tp_name*/
329 sizeof(EVPobject), /*tp_basicsize*/
330 0, /*tp_itemsize*/
331 /* methods */
332 EVP_dealloc, /*tp_dealloc*/
333 0, /*tp_print*/
334 0, /*tp_getattr*/
335 0, /*tp_setattr*/
336 0, /*tp_compare*/
337 EVP_repr, /*tp_repr*/
338 0, /*tp_as_number*/
339 0, /*tp_as_sequence*/
340 0, /*tp_as_mapping*/
341 0, /*tp_hash*/
342 0, /*tp_call*/
343 0, /*tp_str*/
344 0, /*tp_getattro*/
345 0, /*tp_setattro*/
346 0, /*tp_as_buffer*/
347 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
348 hashtype_doc, /*tp_doc*/
349 0, /*tp_traverse*/
350 0, /*tp_clear*/
351 0, /*tp_richcompare*/
352 0, /*tp_weaklistoffset*/
353 0, /*tp_iter*/
354 0, /*tp_iternext*/
355 EVP_methods, /* tp_methods */
356 EVP_members, /* tp_members */
357 EVP_getseters, /* tp_getset */
358 #if 1
359 0, /* tp_base */
360 0, /* tp_dict */
361 0, /* tp_descr_get */
362 0, /* tp_descr_set */
363 0, /* tp_dictoffset */
364 #endif
365 #if HASH_OBJ_CONSTRUCTOR
366 (initproc)EVP_tp_init, /* tp_init */
367 #endif
370 static PyObject *
371 EVPnew(PyObject *name_obj,
372 const EVP_MD *digest, const EVP_MD_CTX *initial_ctx,
373 const unsigned char *cp, Py_ssize_t len)
375 EVPobject *self;
377 if (!digest && !initial_ctx) {
378 PyErr_SetString(PyExc_ValueError, "unsupported hash type");
379 return NULL;
382 if ((self = newEVPobject(name_obj)) == NULL)
383 return NULL;
385 if (initial_ctx) {
386 EVP_MD_CTX_copy(&self->ctx, initial_ctx);
387 } else {
388 EVP_DigestInit(&self->ctx, digest);
391 if (cp && len) {
392 if (len > 0 && len <= MUNCH_SIZE) {
393 EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t,
394 unsigned int));
395 } else {
396 Py_ssize_t offset = 0;
397 while (len > 0) {
398 unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
399 EVP_DigestUpdate(&self->ctx, cp + offset, process);
400 len -= process;
401 offset += process;
406 return (PyObject *)self;
410 /* The module-level function: new() */
412 PyDoc_STRVAR(EVP_new__doc__,
413 "Return a new hash object using the named algorithm.\n\
414 An optional string argument may be provided and will be\n\
415 automatically hashed.\n\
417 The MD5 and SHA1 algorithms are always supported.\n");
419 static PyObject *
420 EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
422 static char *kwlist[] = {"name", "string", NULL};
423 PyObject *name_obj = NULL;
424 PyObject *data_obj = NULL;
425 Py_buffer view = { 0 };
426 PyObject *ret_obj;
427 char *name;
428 const EVP_MD *digest;
430 if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|O:new", kwlist,
431 &name_obj, &data_obj)) {
432 return NULL;
435 if (!PyArg_Parse(name_obj, "s", &name)) {
436 PyErr_SetString(PyExc_TypeError, "name must be a string");
437 return NULL;
440 if (data_obj)
441 GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL);
443 digest = EVP_get_digestbyname(name);
445 ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf,
446 Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
448 if (data_obj)
449 PyBuffer_Release(&view);
450 return ret_obj;
454 * This macro generates constructor function definitions for specific
455 * hash algorithms. These constructors are much faster than calling
456 * the generic one passing it a python string and are noticably
457 * faster than calling a python new() wrapper. Thats important for
458 * code that wants to make hashes of a bunch of small strings.
460 #define GEN_CONSTRUCTOR(NAME) \
461 static PyObject * \
462 EVP_new_ ## NAME (PyObject *self, PyObject *args) \
464 PyObject *data_obj = NULL; \
465 Py_buffer view = { 0 }; \
466 PyObject *ret_obj; \
468 if (!PyArg_ParseTuple(args, "|O:" #NAME , &data_obj)) { \
469 return NULL; \
472 if (data_obj) \
473 GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view, NULL); \
475 ret_obj = EVPnew( \
476 CONST_ ## NAME ## _name_obj, \
477 NULL, \
478 CONST_new_ ## NAME ## _ctx_p, \
479 (unsigned char*)view.buf, \
480 Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int)); \
482 if (data_obj) \
483 PyBuffer_Release(&view); \
484 return ret_obj; \
487 /* a PyMethodDef structure for the constructor */
488 #define CONSTRUCTOR_METH_DEF(NAME) \
489 {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \
490 PyDoc_STR("Returns a " #NAME \
491 " hash object; optionally initialized with a string") \
494 /* used in the init function to setup a constructor */
495 #define INIT_CONSTRUCTOR_CONSTANTS(NAME) do { \
496 CONST_ ## NAME ## _name_obj = PyString_FromString(#NAME); \
497 if (EVP_get_digestbyname(#NAME)) { \
498 CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \
499 EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \
501 } while (0);
503 GEN_CONSTRUCTOR(md5)
504 GEN_CONSTRUCTOR(sha1)
505 GEN_CONSTRUCTOR(sha224)
506 GEN_CONSTRUCTOR(sha256)
507 GEN_CONSTRUCTOR(sha384)
508 GEN_CONSTRUCTOR(sha512)
510 /* List of functions exported by this module */
512 static struct PyMethodDef EVP_functions[] = {
513 {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__},
514 CONSTRUCTOR_METH_DEF(md5),
515 CONSTRUCTOR_METH_DEF(sha1),
516 CONSTRUCTOR_METH_DEF(sha224),
517 CONSTRUCTOR_METH_DEF(sha256),
518 CONSTRUCTOR_METH_DEF(sha384),
519 CONSTRUCTOR_METH_DEF(sha512),
520 {NULL, NULL} /* Sentinel */
524 /* Initialize this module. */
526 PyMODINIT_FUNC
527 init_hashlib(void)
529 PyObject *m;
531 OpenSSL_add_all_digests();
533 /* TODO build EVP_functions openssl_* entries dynamically based
534 * on what hashes are supported rather than listing many
535 * but having some be unsupported. Only init appropriate
536 * constants. */
538 Py_TYPE(&EVPtype) = &PyType_Type;
539 if (PyType_Ready(&EVPtype) < 0)
540 return;
542 m = Py_InitModule("_hashlib", EVP_functions);
543 if (m == NULL)
544 return;
546 #if HASH_OBJ_CONSTRUCTOR
547 Py_INCREF(&EVPtype);
548 PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype);
549 #endif
551 /* these constants are used by the convenience constructors */
552 INIT_CONSTRUCTOR_CONSTANTS(md5);
553 INIT_CONSTRUCTOR_CONSTANTS(sha1);
554 INIT_CONSTRUCTOR_CONSTANTS(sha224);
555 INIT_CONSTRUCTOR_CONSTANTS(sha256);
556 INIT_CONSTRUCTOR_CONSTANTS(sha384);
557 INIT_CONSTRUCTOR_CONSTANTS(sha512);