[AWLUtilities] Undeclared globals in force_utf8.
[awl.git] / inc / AWLUtilities.php
blob96d61600be68266f6555a9586eefa28823d52a6e
1 <?php
2 /**
3 * Utility functions of a general nature which are used by
4 * most AWL library classes.
6 * @package awl
7 * @subpackage Utilities
8 * @author Andrew McMillan <andrew@mcmillan.net.nz>
9 * @copyright Catalyst IT Ltd, Morphoss Ltd <http://www.morphoss.com/>
10 * @license http://www.gnu.org/licenses/lgpl-3.0.txt GNU LGPL version 3 or later
13 if ( !function_exists('dbg_error_log') ) {
14 /**
15 * Writes a debug message into the error log using printf syntax. If the first
16 * parameter is "ERROR" then the message will _always_ be logged.
17 * Otherwise, the first parameter is a "component" name, and will only be logged
18 * if $c->dbg["component"] is set to some non-null value.
20 * If you want to see every log message then $c->dbg["ALL"] can be set, to
21 * override the debugging status of the individual components.
23 * @var string $component The component to identify itself, or "ERROR", or "LOG:component"
24 * @var string $format A format string for the log message
25 * @var [string $parameter ...] Parameters for the format string.
27 function dbg_error_log() {
28 global $c;
29 $args = func_get_args();
30 $type = "DBG";
31 $component = array_shift($args);
32 if ( substr( $component, 0, 3) == "LOG" ) {
33 // Special escape case for stuff that always gets logged.
34 $type = 'LOG';
35 $component = substr($component,4);
37 else if ( $component == "ERROR" ) {
38 $type = "***";
40 else if ( isset($c->dbg["ALL"]) ) {
41 $type = "ALL";
43 else if ( !isset($c->dbg[strtolower($component)]) ) return;
45 $argc = func_num_args();
46 if ( 2 <= $argc ) {
47 $format = array_shift($args);
49 else {
50 $format = "%s";
52 @error_log( $c->sysabbr.": $type: $component:". vsprintf( $format, $args ) );
58 if ( !function_exists('apache_request_headers') ) {
59 /**
60 * Compatibility so we can use the apache function name and still work with CGI
61 * @package awl
63 eval('
64 function apache_request_headers() {
65 foreach($_SERVER as $key=>$value) {
66 if (substr($key,0,5)=="HTTP_") {
67 $key=str_replace(" ","-",ucwords(strtolower(str_replace("_"," ",substr($key,5)))));
68 $out[$key]=$value;
71 return $out;
73 ');
78 if ( !function_exists('dbg_log_array') ) {
79 /**
80 * Function to dump an array to the error log, possibly recursively
82 * @var string $component Which component should this log message identify itself from
83 * @var string $name What name should this array dump identify itself as
84 * @var array $arr The array to be dumped.
85 * @var boolean $recursive Should the dump recurse into arrays/objects in the array
87 function dbg_log_array( $component, $name, $arr, $recursive = false ) {
88 if ( !isset($arr) || (gettype($arr) != 'array' && gettype($arr) != 'object') ) {
89 dbg_error_log( $component, "%s: array is not set, or is not an array!", $name);
90 return;
92 foreach ($arr as $key => $value) {
93 dbg_error_log( $component, "%s: >>%s<< = >>%s<<", $name, $key,
94 (gettype($value) == 'array' || gettype($value) == 'object' ? gettype($value) : $value) );
95 if ( $recursive && (gettype($value) == 'array' || (gettype($value) == 'object' && "$key" != 'self' && "$key" != 'parent') ) ) {
96 dbg_log_array( $component, "$name"."[$key]", $value, $recursive );
104 if ( !function_exists("session_simple_md5") ) {
106 * Make a plain MD5 hash of a string, identifying the type of hash it is
108 * @param string $instr The string to be salted and MD5'd
109 * @return string The *MD5* and the MD5 of the string
111 function session_simple_md5( $instr ) {
112 global $c;
113 if ( isset($c->dbg['password']) ) dbg_error_log( "Login", "Making plain MD5: instr=$instr, md5($instr)=".md5($instr) );
114 return ( '*MD5*'. md5($instr) );
120 if ( !function_exists("session_salted_md5") ) {
122 * Make a salted MD5 string, given a string and (possibly) a salt.
124 * If no salt is supplied we will generate a random one.
126 * @param string $instr The string to be salted and MD5'd
127 * @param string $salt Some salt to sprinkle into the string to be MD5'd so we don't get the same PW always hashing to the same value.
128 * @return string The salt, a * and the MD5 of the salted string, as in SALT*SALTEDHASH
130 function session_salted_md5( $instr, $salt = "" ) {
131 if ( $salt == "" ) $salt = substr( md5(rand(100000,999999)), 2, 8);
132 global $c;
133 if ( isset($c->dbg['password']) ) dbg_error_log( "Login", "Making salted MD5: salt=$salt, instr=$instr, md5($salt$instr)=".md5($salt . $instr) );
134 return ( sprintf("*%s*%s", $salt, md5($salt . $instr) ) );
140 if ( !function_exists("session_salted_sha1") ) {
142 * Make a salted SHA1 string, given a string and (possibly) a salt. PHP5 only (although it
143 * could be made to work on PHP4 (@see http://www.openldap.org/faq/data/cache/347.html). The
144 * algorithm used here is compatible with OpenLDAP so passwords generated through this function
145 * should be able to be migrated to OpenLDAP by using the part following the second '*', i.e.
146 * the '{SSHA}....' part.
148 * If no salt is supplied we will generate a random one.
150 * @param string $instr The string to be salted and SHA1'd
151 * @param string $salt Some salt to sprinkle into the string to be SHA1'd so we don't get the same PW always hashing to the same value.
152 * @return string A *, the salt, a * and the SHA1 of the salted string, as in *SALT*SALTEDHASH
154 function session_salted_sha1( $instr, $salt = "" ) {
155 if ( $salt == "" ) $salt = substr( str_replace('*','',base64_encode(sha1(rand(100000,9999999),true))), 2, 9);
156 global $c;
157 if ( isset($c->dbg['password']) ) dbg_error_log( "Login", "Making salted SHA1: salt=$salt, instr=$instr, encoded($instr$salt)=".base64_encode(sha1($instr . $salt, true).$salt) );
158 return ( sprintf("*%s*{SSHA}%s", $salt, base64_encode(sha1($instr.$salt, true) . $salt ) ) );
163 if ( !function_exists("session_validate_password") ) {
165 * Checks what a user entered against the actual password on their account.
166 * @param string $they_sent What the user entered.
167 * @param string $we_have What we have in the database as their password. Which may (or may not) be a salted MD5.
168 * @return boolean Whether or not the users attempt matches what is already on file.
170 function session_validate_password( $they_sent, $we_have ) {
171 if ( preg_match('/^\*\*.+$/', $we_have ) ) {
172 // The "forced" style of "**plaintext" to allow easier admin setting
173 return ( "**$they_sent" == $we_have );
176 if ( preg_match('/^\*(.+)\*{[A-Z]+}.+$/', $we_have, $regs ) ) {
177 if ( function_exists("session_salted_sha1") ) {
178 // A nicely salted sha1sum like "*<salt>*{SSHA}<salted_sha1>"
179 $salt = $regs[1];
180 $sha1_sent = session_salted_sha1( $they_sent, $salt ) ;
181 return ( $sha1_sent == $we_have );
183 else {
184 dbg_error_log( "ERROR", "Password is salted SHA-1 but you are using PHP4!" );
185 echo <<<EOERRMSG
186 <html>
187 <head>
188 <title>Salted SHA1 Password format not supported with PHP4</title>
189 </head>
190 <body>
191 <h1>Salted SHA1 Password format not supported with PHP4</h1>
192 <p>At some point you have used PHP5 to set the password for this user and now you are
193 using PHP4. You will need to assign a new password to this user using PHP4, or ensure
194 you use PHP5 everywhere (recommended).</p>
195 <p>AWL has now switched to using salted SHA-1 passwords by preference in a format
196 compatible with OpenLDAP.</p>
197 </body>
198 </html>
199 EOERRMSG;
200 exit;
204 if ( preg_match('/^\*MD5\*.+$/', $we_have, $regs ) ) {
205 // A crappy unsalted md5sum like "*MD5*<md5>"
206 $md5_sent = session_simple_md5( $they_sent ) ;
207 return ( $md5_sent == $we_have );
209 else if ( preg_match('/^\*(.+)\*.+$/', $we_have, $regs ) ) {
210 // A nicely salted md5sum like "*<salt>*<salted_md5>"
211 $salt = $regs[1];
212 $md5_sent = session_salted_md5( $they_sent, $salt ) ;
213 return ( $md5_sent == $we_have );
216 // Anything else is bad
217 return false;
224 if ( !function_exists("replace_uri_params") ) {
226 * Given a URL (presumably the current one) and a parameter, replace the value of parameter,
227 * extending the URL as necessary if the parameter is not already there.
228 * @param string $uri The URI we will be replacing parameters in.
229 * @param array $replacements An array of replacement pairs array( "replace_this" => "with this" )
230 * @return string The URI with the replacements done.
232 function replace_uri_params( $uri, $replacements ) {
233 $replaced = $uri;
234 foreach( $replacements AS $param => $new_value ) {
235 $rxp = preg_replace( '/([\[\]])/', '\\\\$1', $param ); // Some parameters may be arrays.
236 $regex = "/([&?])($rxp)=([^&]+)/";
237 dbg_error_log("core", "Looking for [%s] to replace with [%s] regex is %s and searching [%s]", $param, $new_value, $regex, $replaced );
238 if ( preg_match( $regex, $replaced ) )
239 $replaced = preg_replace( $regex, "\$1$param=$new_value", $replaced);
240 else
241 $replaced .= "&$param=$new_value";
243 if ( ! preg_match( '/\?/', $replaced ) ) {
244 $replaced = preg_replace("/&(.+)$/", "?\$1", $replaced);
246 $replaced = str_replace("&amp;", "--AmPeRsAnD--", $replaced);
247 $replaced = str_replace("&", "&amp;", $replaced);
248 $replaced = str_replace("--AmPeRsAnD--", "&amp;", $replaced);
249 dbg_error_log("core", "URI <<$uri>> morphed to <<$replaced>>");
250 return $replaced;
255 if ( !function_exists("uuid") ) {
257 * Generates a Universally Unique IDentifier, version 4.
259 * RFC 4122 (http://www.ietf.org/rfc/rfc4122.txt) defines a special type of Globally
260 * Unique IDentifiers (GUID), as well as several methods for producing them. One
261 * such method, described in section 4.4, is based on truly random or pseudo-random
262 * number generators, and is therefore implementable in a language like PHP.
264 * We choose to produce pseudo-random numbers with the Mersenne Twister, and to always
265 * limit single generated numbers to 16 bits (ie. the decimal value 65535). That is
266 * because, even on 32-bit systems, PHP's RAND_MAX will often be the maximum *signed*
267 * value, with only the equivalent of 31 significant bits. Producing two 16-bit random
268 * numbers to make up a 32-bit one is less efficient, but guarantees that all 32 bits
269 * are random.
271 * The algorithm for version 4 UUIDs (ie. those based on random number generators)
272 * states that all 128 bits separated into the various fields (32 bits, 16 bits, 16 bits,
273 * 8 bits and 8 bits, 48 bits) should be random, except : (a) the version number should
274 * be the last 4 bits in the 3rd field, and (b) bits 6 and 7 of the 4th field should
275 * be 01. We try to conform to that definition as efficiently as possible, generating
276 * smaller values where possible, and minimizing the number of base conversions.
278 * @copyright Copyright (c) CFD Labs, 2006. This function may be used freely for
279 * any purpose ; it is distributed without any form of warranty whatsoever.
280 * @author David Holmes <dholmes@cfdsoftware.net>
282 * @return string A UUID, made up of 32 hex digits and 4 hyphens.
285 function uuid() {
287 // The field names refer to RFC 4122 section 4.1.2
289 return sprintf('%04x%04x-%04x-%03x4-%04x-%04x%04x%04x',
290 mt_rand(0, 65535), mt_rand(0, 65535), // 32 bits for "time_low"
291 mt_rand(0, 65535), // 16 bits for "time_mid"
292 mt_rand(0, 4095), // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
293 bindec(substr_replace(sprintf('%016b', mt_rand(0, 65535)), '01', 6, 2)),
294 // 8 bits, the last two of which (positions 6 and 7) are 01, for "clk_seq_hi_res"
295 // (hence, the 2nd hex digit after the 3rd hyphen can only be 1, 5, 9 or d)
296 // 8 bits for "clk_seq_low"
297 mt_rand(0, 65535), mt_rand(0, 65535), mt_rand(0, 65535) // 48 bits for "node"
302 if ( !function_exists("translate") ) {
303 require("Translation.php");
306 if ( !function_exists("clone") && version_compare(phpversion(), '5.0') < 0) {
308 * PHP5 screws with the assignment operator changing so that $a = $b means that
309 * $a becomes a reference to $b. There is a clone() that we can use in PHP5, so
310 * we have to emulate that for PHP4. Bleargh.
312 eval( 'function clone($object) { return $object; }' );
315 if ( !function_exists("quoted_printable_encode") ) {
317 * Process a string to fit the requirements of RFC2045 section 6.7. Note that
318 * this works, but replaces more characters than the minimum set. For readability
319 * the spaces aren't encoded as =20 though.
321 function quoted_printable_encode($string) {
322 return preg_replace('/[^\r\n]{73}[^=\r\n]{2}/', "$0=\r\n", str_replace("%","=",str_replace("%20"," ",rawurlencode($string))));
327 if ( !function_exists("clean_by_regex") ) {
329 * Clean a value by applying a regex to it. If it is an array apply it to
330 * each element in the array recursively. If it is an object we don't mess
331 * with it.
333 function clean_by_regex( $val, $regex ) {
334 if ( is_null($val) ) return null;
335 switch( $regex ) {
336 case 'int': $regex = '#^\d+$#'; break;
338 if ( is_array($val) ) {
339 foreach( $val AS $k => $v ) {
340 $val[$k] = clean_by_regex($v,$regex);
343 else if ( ! is_object($val) ) {
344 if ( preg_match( $regex, $val, $matches) ) {
345 $val = $matches[0];
347 else {
348 $val = '';
351 return $val;
356 if ( !function_exists("param_to_global") ) {
358 * Convert a parameter to a global. We first look in _POST and then in _GET,
359 * and if they passed in a bunch of valid characters, we will make sure the
360 * incoming is cleaned to only match that set.
362 * @param string $varname The name of the global variable to put the answer in
363 * @param string $match_regex The part of the parameter matching this regex will be returned
364 * @param string $alias1 An alias for the name that we should look for first.
365 * @param " ... More aliases, in the order which they should be examined. $varname will be appended to the end.
367 function param_to_global( ) {
368 $args = func_get_args();
370 $varname = array_shift($args);
371 $GLOBALS[$varname] = null;
373 $match_regex = null;
374 $argc = func_num_args();
375 if ( $argc > 1 ) {
376 $match_regex = array_shift($args);
379 $args[] = $varname;
380 foreach( $args AS $k => $name ) {
381 if ( isset($_POST[$name]) ) {
382 $result = $_POST[$name];
383 break;
385 else if ( isset($_GET[$name]) ) {
386 $result = $_GET[$name];
387 break;
390 if ( !isset($result) ) return null;
392 if ( isset($match_regex) ) {
393 $result = clean_by_regex( $result, $match_regex );
396 $GLOBALS[$varname] = $result;
397 return $result;
402 if ( !function_exists("get_fields") ) {
404 * @var array $_AWL_field_cache is a cache of the field names for a table
406 $_AWL_field_cache = array();
410 * Get the names of the fields for a particular table
411 * @param string $tablename The name of the table.
412 * @return array of string The public fields in the table.
414 function get_fields( $tablename ) {
415 global $_AWL_field_cache;
417 if ( !isset($_AWL_field_cache[$tablename]) ) {
418 dbg_error_log( "DataUpdate", ":get_fields: Loaded fields for table '$tablename'" );
419 $sql = "SELECT f.attname, t.typname, f.atttypmod FROM pg_attribute f ";
420 $sql .= "JOIN pg_class c ON ( f.attrelid = c.oid ) ";
421 $sql .= "JOIN pg_type t ON ( f.atttypid = t.oid ) ";
422 $sql .= "WHERE relname = ? AND attnum >= 0 order by f.attnum;";
423 $qry = new PgQuery( $sql, $tablename );
424 $qry->Exec("DataUpdate");
425 $fields = array();
426 while( $row = $qry->Fetch() ) {
427 $fields["$row->attname"] = $row->typname . ($row->atttypmod != -1 ? sprintf('(%d)',$row->atttypmod) : '');
429 $_AWL_field_cache[$tablename] = $fields;
431 return $_AWL_field_cache[$tablename];
436 if ( !function_exists("force_utf8") ) {
437 function define_byte_mappings() {
438 global $byte_map, $nibble_good_chars;
440 # Needed for using Grant McLean's byte mappings code
441 $ascii_char = '[\x00-\x7F]';
442 $cont_byte = '[\x80-\xBF]';
444 $utf8_2 = '[\xC0-\xDF]' . $cont_byte;
445 $utf8_3 = '[\xE0-\xEF]' . $cont_byte . '{2}';
446 $utf8_4 = '[\xF0-\xF7]' . $cont_byte . '{3}';
447 $utf8_5 = '[\xF8-\xFB]' . $cont_byte . '{4}';
449 $nibble_good_chars = "/^($ascii_char+|$utf8_2|$utf8_3|$utf8_4|$utf8_5)(.*)$/s";
451 # From http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
452 $byte_map = array(
453 "\x80" => "\xE2\x82\xAC", # EURO SIGN
454 "\x82" => "\xE2\x80\x9A", # SINGLE LOW-9 QUOTATION MARK
455 "\x83" => "\xC6\x92", # LATIN SMALL LETTER F WITH HOOK
456 "\x84" => "\xE2\x80\x9E", # DOUBLE LOW-9 QUOTATION MARK
457 "\x85" => "\xE2\x80\xA6", # HORIZONTAL ELLIPSIS
458 "\x86" => "\xE2\x80\xA0", # DAGGER
459 "\x87" => "\xE2\x80\xA1", # DOUBLE DAGGER
460 "\x88" => "\xCB\x86", # MODIFIER LETTER CIRCUMFLEX ACCENT
461 "\x89" => "\xE2\x80\xB0", # PER MILLE SIGN
462 "\x8A" => "\xC5\xA0", # LATIN CAPITAL LETTER S WITH CARON
463 "\x8B" => "\xE2\x80\xB9", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
464 "\x8C" => "\xC5\x92", # LATIN CAPITAL LIGATURE OE
465 "\x8E" => "\xC5\xBD", # LATIN CAPITAL LETTER Z WITH CARON
466 "\x91" => "\xE2\x80\x98", # LEFT SINGLE QUOTATION MARK
467 "\x92" => "\xE2\x80\x99", # RIGHT SINGLE QUOTATION MARK
468 "\x93" => "\xE2\x80\x9C", # LEFT DOUBLE QUOTATION MARK
469 "\x94" => "\xE2\x80\x9D", # RIGHT DOUBLE QUOTATION MARK
470 "\x95" => "\xE2\x80\xA2", # BULLET
471 "\x96" => "\xE2\x80\x93", # EN DASH
472 "\x97" => "\xE2\x80\x94", # EM DASH
473 "\x98" => "\xCB\x9C", # SMALL TILDE
474 "\x99" => "\xE2\x84\xA2", # TRADE MARK SIGN
475 "\x9A" => "\xC5\xA1", # LATIN SMALL LETTER S WITH CARON
476 "\x9B" => "\xE2\x80\xBA", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
477 "\x9C" => "\xC5\x93", # LATIN SMALL LIGATURE OE
478 "\x9E" => "\xC5\xBE", # LATIN SMALL LETTER Z WITH CARON
479 "\x9F" => "\xC5\xB8", # LATIN CAPITAL LETTER Y WITH DIAERESIS
482 for( $i=160; $i < 256; $i++ ) {
483 $ch = chr($i);
484 $byte_map[$ch] = iconv('ISO-8859-1', 'UTF-8', $ch);
487 define_byte_mappings();
489 function force_utf8( $input ) {
490 global $byte_map, $nibble_good_chars;
492 $output = '';
493 $char = '';
494 $rest = '';
495 while( $input != '' ) {
496 if ( preg_match( $nibble_good_chars, $input, $matches ) ) {
497 $output .= $matches[1];
498 $rest = $matches[2];
500 else {
501 preg_match( '/^(.)(.*)$/s', $input, $matches );
502 $char = $matches[1];
503 $rest = $matches[2];
504 if ( isset($byte_map[$char]) ) {
505 $output .= $byte_map[$char];
507 else {
508 # Must be valid UTF8 already
509 $output .= $char;
512 $input = $rest;
514 return $output;
521 * Try and extract something like "Pacific/Auckland" or "America/Indiana/Indianapolis" if possible.
523 function olson_from_tzstring( $tzstring ) {
524 if ( in_array($tzstring,timezone_identifiers_list()) ) return $tzstring;
525 if ( preg_match( '{((Antarctica|America|Africa|Atlantic|Asia|Australia|Indian|Europe|Pacific)/(([^/]+)/)?[^/]+)$}', $tzstring, $matches ) ) {
526 // dbg_error_log( 'INFO', 'Found timezone "%s" from string "%s"', $matches[1], $tzstring );
527 return $matches[1];
529 switch( $tzstring ) {
530 case 'New Zealand Standard Time': return 'Pacific/Auckland'; break;
532 return null;