Added Canvas 1.1.0, originally not under SCM so no historical development records...
[canvas.git] / library / ext / Pluralize.php
blobf93ea0720a4df7d5193dea5610565bd58328d805
1 <?php
2 // @title Pluralize
3 // @role user-defined extension (used in helper)
4 // @author Matt Todd <matt@matttoddphoto.com>
5 // @created 2006-01-08
6 // @desc Handles pluralizing words passed to it.
7 // @refer_to "An Algorithmic Approach to English Pluralization":http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html
8 // @requires stdexception.php (StdException class)
10 // inflector (handles pluralization and singularization)
11 class Inflector {
12 private static $words_that_do_not_inflect_in_the_plural = array("fish", "-ois", "sheep", "deer", "-pox", '[A-Z].*ese', "-itis"); // will return original word
13 private static $user_defined_inflections = array(
14 // "word"=>"inflection",
15 "role"=>"roles",
16 /* "comment"=>"comments",
17 "user"=>"users",
18 "word"=>"words",
19 "category"=>"categories",
20 "file"=>"files",
21 "post"=>"posts",
22 "tag"=>"tags",
23 "role"=>"roles",
24 "activity"=>"activities",
25 "event"=>"events",
26 "favorite"=>"favorites",
27 "photo"=>"photos",
28 "link"=>"links",
29 "privilege"=>"privileges",
30 */ ); // defined by the user by define_inflection()
31 private static $irregular_words = array(
32 'beef'=>'beefs',
33 'brother'=>'brothers',
34 'child'=>'children',
35 'person'=>'people',
36 'cow'=>'cows',
37 'ephemeris'=>'ephemerides',
38 'genie'=>'genies',
39 'money'=>'monies',
40 'mongoose'=>'mongooses',
41 'mythos'=>'mythoi',
42 'octopus'=>'octopuses',
43 'ox'=>'oxen',
44 'soliloquy'=>'soliloquies',
45 'trilby'=>'trilbys',
47 private static $irregular_inflections = array(
48 '-man'=>'-men',
49 '-[lm]ouse'=>'-ice',
50 '-tooth'=>'-teeth',
51 '-goose'=>'-geese',
52 '-foot'=>'-feet',
53 '-zoon'=>'-zoa',
54 // '-[csx]is'=>'-es',
56 private static $classical_inflections = array(
57 '-ex'=>'-ices',
58 '-um'=>'-a',
59 '-on'=>'-a',
60 '-a'=>'-ae',
62 private static $es = array(
63 '-ch'=>'-ches',
64 '-sh'=>'-shes',
65 '-ss'=>'-sses',
67 private static $f = array(
68 '-f'=>'-ves',
70 private static $y = array(
71 '-[aeiou]y'=>'-ys',
72 '-[A-Z].*y'=>'-ys',
73 '-y'=>'-ies',
75 private static $o = array(
76 '-[aeiou]o'=>'-os',
77 '-o'=>'-oes',
80 // pluralize the word
81 public function pluralize($word) {
82 // run the gamut
83 if($inflection = self::run_gamut($word)) return $inflection;
85 // if it ends in -s, pluralize it with -es
86 if(substr($word, -1, 1) == 's') return "{$word}es";
88 // otherwise, just add an -s to the word
89 return "{$word}s";
91 public function singularize($word) {
92 // run the gamut
93 if($inflection = self::run_gamut($word)) return $inflection;
95 // if it ends in -es, remove it and return
96 if(substr($word, -2, 2) == 'es') return substr($word, 0, -2);
98 // otherwise, if the word ends in -s, remove it and return it
99 if(substr($word, -1, 1) == 's') return substr($word, 0, -1);
100 return $word;
102 private static function run_gamut($word) {
103 if($inflection = self::user_defined($word)) return $inflection;
105 // return the word if it's the same plural or singular
106 if(self::does_not_inflect($word)) return $word;
108 // normally we'd handle pronouns here, but I don't see any point in doing that for this,
109 // but it could always be fleshed out in the future to include this functionality.
111 // check for irregular words and inflections
112 if($inflection = self::irregular($word)) return $inflection;
114 // check for classical inflections
115 if($inflection = self::classical($word)) return $inflection;
117 // check for -es inflections
118 if($inflection = self::es($word)) return $inflection;
120 // check for -f inflections
121 if($inflection = self::f($word)) return $inflection;
123 // check for -y inflections
124 if($inflection = self::y($word)) return $inflection;
126 // check for -o inflections
127 if($inflection = self::o($word)) return $inflection;
129 // none of these, so return false to signify no change
130 return false;
133 // set user defined inflections
134 public static function define_inflection($inflection) {
135 // @desc alias for define_inflections($inflections)
136 define_inflections($inflection);
138 public static function define_inflections($inflections) {
139 // @desc defines numerous inflections
140 // @format ["word"=>"inflection", ...]
141 foreach($inflections as $word=>$inflection) self::$user_defined_inflections[$word] = $inflection;
143 private static function user_defined($word) {
144 // @desc returns the inflected word if it's been defined by the user... false if not
145 if(array_key_exists($word, self::$user_defined_inflections)) {
146 return self::$user_defined_inflections[$word];
148 if(in_array($word, self::$user_defined_inflections)) {
149 return array_search($word, self::$user_defined_inflections);
152 return false;
154 private static function does_not_inflect($word) {
155 // check to see if a word does not inflect
156 foreach(self::$words_that_do_not_inflect_in_the_plural as $noninflector) {
157 if(substr($noninflector, 0, 1) == '-') $noninflector = '.*' . substr($noninflector, 1);
158 if(preg_match("/{$noninflector}/", $word) == 1) {
159 // print "Warning: noninflector detected ({$word})\n";
160 return true; // if the word matches the regex (once), then return the word
164 return false;
166 private static function irregular($word) {
167 // @desc returns irregular forms of words
169 // check if it's an irregular word
170 if(array_key_exists($word, self::$irregular_words)) {
171 return self::$irregular_words[$word];
173 if(in_array($word, self::$irregular_words)) {
174 return array_search($word, self::$irregular_words);
177 // if it hasn't matched yet, then check to see if it's an irregular inflection
178 foreach(self::$irregular_inflections as $inflection=>$inflected_form) {
179 $inflection_root = substr($inflection, 1);
180 $inflection = ".*{$inflection_root}";
181 $inflected_form = substr($inflected_form, 1);
182 if(preg_match("/{$inflection}$/", $word) == 1) {
183 $inflection_root = preg_replace('/(.*)(\[.*\])(.*)/', '$1$3', $inflection_root);
184 $inflected_form = preg_replace('/(.*)(\[.*\])(.*)/', '$1$3', $inflected_form);
185 // print "Warning: irregular inflection detected ({$word})\n";
186 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
189 // now for singular form
190 foreach(self::$irregular_inflections as $inflected_form=>$inflection) {
191 $inflection_root = substr($inflection, 1);
192 $inflection = ".*{$inflection_root}";
193 $inflected_form = substr($inflected_form, 1);
194 if(preg_match("/{$inflection}$/", $word) == 1) {
195 $inflection_root = preg_replace('/(.*)(\[.*\])(.*)/', '$1$3', $inflection_root);
196 $inflected_form = preg_replace('/(.*)(\[.*\])(.*)/', '$1$3', $inflected_form);
197 // print "Warning: irregular inflection detected ({$word})\n";
198 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
202 return false;
204 private static function classical($word) {
205 // check to see if it's a classical inflection
206 foreach(self::$classical_inflections as $inflection=>$inflected_form) {
207 $inflection_root = substr($inflection, 1);
208 $inflection = ".*{$inflection_root}";
209 $inflected_form = substr($inflected_form, 1);
210 if(preg_match("/{$inflection}$/", $word) == 1) {
211 // print "Warning: classical inflection detected ({$word})\n";
212 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
215 // now for singular form
216 foreach(self::$classical_inflections as $inflected_form=>$inflection) {
217 $inflection_root = substr($inflection, 1);
218 $inflection = ".*{$inflection_root}";
219 $inflected_form = substr($inflected_form, 1);
220 if(preg_match("/{$inflection}$/", $word) == 1) {
221 // print "Warning: classical inflection detected ({$word})\n";
222 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
226 return false;
228 private static function es($word) {
229 // @desc returns the inflection of an -es inflected/inflectable word
230 // check to see if it's an -es inflection
231 foreach(self::$es as $inflection=>$inflected_form) {
232 $inflection_root = substr($inflection, 1);
233 $inflection = ".*{$inflection_root}";
234 $inflected_form = substr($inflected_form, 1);
235 if(preg_match("/{$inflection}$/", $word) == 1) {
236 // print "Warning: -es inflection detected ({$word})\n";
237 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
240 // now for singular form
241 foreach(self::$es as $inflected_form=>$inflection) {
242 $inflection_root = substr($inflection, 1);
243 $inflection = ".*{$inflection_root}";
244 $inflected_form = substr($inflected_form, 1);
245 if(preg_match("/{$inflection}$/", $word) == 1) {
246 // print "Warning: -es inflection detected ({$word})\n";
247 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
251 return false;
253 private static function f($word) {
254 // @desc returns the inflection of an -f inflected/inflectable word
255 // check to see if it's an -f inflection
256 foreach(self::$f as $inflection=>$inflected_form) {
257 $inflection_root = substr($inflection, 1);
258 $inflection = ".*{$inflection_root}";
259 $inflected_form = substr($inflected_form, 1);
260 if(preg_match("/{$inflection}$/", $word) == 1) {
261 // print "Warning: -f inflection detected ({$word})\n";
262 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
265 // now for singular form
266 foreach(self::$f as $inflected_form=>$inflection) {
267 $inflection_root = substr($inflection, 1);
268 $inflection = ".*{$inflection_root}";
269 $inflected_form = substr($inflected_form, 1);
270 if(preg_match("/{$inflection}$/", $word) == 1) {
271 // print "Warning: -f inflection detected ({$word})\n";
272 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
276 return false;
278 private static function y($word) {
279 // @desc returns the inflection of a -y inflected/inflectable word
280 // check to see if it's a -y inflection
281 foreach(self::$y as $inflection=>$inflected_form) {
282 $inflection_root = substr($inflection, 1);
283 $inflection = ".*{$inflection_root}";
284 $inflected_form = substr($inflected_form, 1);
285 if(preg_match("/{$inflection}$/", $word) == 1) {
286 // print "Warning: -y inflection detected ({$word})\n";
287 return self::inflect($word, $inflection_root, $inflected_form);
288 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
291 // now for singular form
292 foreach(self::$y as $inflected_form=>$inflection) {
293 $inflection_root = substr($inflection, 1);
294 $inflection = ".*{$inflection_root}";
295 $inflected_form = substr($inflected_form, 1);
296 if(preg_match("/{$inflection}$/", $word) == 1) {
297 // print "Warning: -y inflection detected ({$word})\n";
298 return self::inflect($word, $inflection_root, $inflected_form);
299 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
303 return false;
305 private static function o($word) {
306 // @desc returns the inflection of an -o inflected/inflectable word
307 // check to see if it's an -o inflection
308 foreach(self::$o as $inflection=>$inflected_form) {
309 $inflection_root = substr($inflection, 1);
310 $inflection = ".*{$inflection_root}";
311 $inflected_form = substr($inflected_form, 1);
312 if(preg_match("/{$inflection}$/", $word) == 1) {
313 // print "Warning: -o inflection detected ({$word})\n";
314 return self::inflect($word, $inflection_root, $inflected_form);
315 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
318 // now for singular form
319 foreach(self::$o as $inflected_form=>$inflection) {
320 $inflection_root = substr($inflection, 1);
321 $inflection = ".*{$inflection_root}";
322 $inflected_form = substr($inflected_form, 1);
323 if(preg_match("/{$inflection}$/", $word) == 1) {
324 // print "Warning: -o inflection detected ({$word})\n";
325 return self::inflect($word, $inflection_root, $inflected_form);
326 return str_replace($inflection_root, $inflected_form, $word); // if the word matches the regex (once), then return the word
329 return false;
332 // action functions
333 private static function inflect($word, $ending, $inflection) {
334 $ending = str_replace('.*', '', str_replace('[A-Z]', '', str_replace('[aeiou]', '', $ending)));
335 $inflection = str_replace('.*', '', str_replace('[A-Z]', '', str_replace('[aeiou]', '', $inflection)));
336 // $ending = preg_replace('/(.*)(\[\w*\]|\W*)(.*)$/', '$1$3', $ending);
337 // $inflection = preg_replace('/(.*)(\[\w*\]|\W*)(.*)$/', '$1$3', $inflection);
338 return preg_replace("/(\w+){$ending}$/", '$1' . $inflection, $word);
341 // get pluralization
342 public static function __get($word) {
343 return self::pluralize($word);
345 public static function __set($word, $number) {
346 if($number > 1) return self::pluralize($word);
347 return $word;
351 class PluralizeException extends StdException {}
353 // stolen from Rails' active_support
354 /*# The Inflector transforms words from singular to plural, class names to table names, modularized class names to ones without,
355 # and class names to foreign keys.
356 module Inflector
357 extend self
359 def pluralize(word)
360 result = word.to_s.dup
362 if uncountable_words.include?(result.downcase)
363 result
364 else
365 plural_rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
366 result
370 def singularize(word)
371 result = word.to_s.dup
373 if uncountable_words.include?(result.downcase)
374 result
375 else
376 singular_rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
377 result
381 def camelize(lower_case_and_underscored_word)
382 lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
385 def underscore(camel_cased_word)
386 camel_cased_word.to_s.gsub(/::/, '/').gsub(/([A-Z]+)([A-Z])/,'\1_\2').gsub(/([a-z\d])([A-Z])/,'\1_\2').downcase
389 def humanize(lower_case_and_underscored_word)
390 lower_case_and_underscored_word.to_s.gsub(/_/, " ").capitalize
393 def demodulize(class_name_in_module)
394 class_name_in_module.to_s.gsub(/^.*::/, '')
397 def tableize(class_name)
398 pluralize(underscore(class_name))
401 def classify(table_name)
402 camelize(singularize(table_name))
405 def foreign_key(class_name, separate_class_name_and_id_with_underscore = true)
406 Inflector.underscore(Inflector.demodulize(class_name)) +
407 (separate_class_name_and_id_with_underscore ? "_id" : "id")
410 def constantize(camel_cased_word)
411 camel_cased_word.split("::").inject(Object) do |final_type, part|
412 final_type = final_type.const_get(part)
416 private
417 def uncountable_words #:doc
418 %w( equipment information rice money species series fish )
421 def plural_rules #:doc:
423 [/^(ox)$/i, '\1\2en'], # ox
424 [/([m|l])ouse$/i, '\1ice'], # mouse, louse
425 [/(matr|vert)ix|ex$/i, '\1ices'], # matrix, vertex, index
426 [/(x|ch|ss|sh)$/i, '\1es'], # search, switch, fix, box, process, address
427 [/([^aeiouy]|qu)ies$/i, '\1y'],
428 [/([^aeiouy]|qu)y$/i, '\1ies'], # query, ability, agency
429 [/(hive)$/i, '\1s'], # archive, hive
430 [/(?:([^f])fe|([lr])f)$/i, '\1\2ves'], # half, safe, wife
431 [/sis$/i, 'ses'], # basis, diagnosis
432 [/([ti])um$/i, '\1a'], # datum, medium
433 [/(p)erson$/i, '\1eople'], # person, salesperson
434 [/(m)an$/i, '\1en'], # man, woman, spokesman
435 [/(c)hild$/i, '\1hildren'], # child
436 [/(buffal|tomat)o$/i, '\1\2oes'], # buffalo, tomato
437 [/(bu)s$/i, '\1\2ses'], # bus
438 [/(alias)/i, '\1es'], # alias
439 [/(octop|vir)us$/i, '\1i'], # octopus, virus - virus has no defined plural (according to Latin/dictionary.com), but viri is better than viruses/viruss
440 [/(ax|cri|test)is$/i, '\1es'], # axis, crisis
441 [/s$/i, 's'], # no change (compatibility)
442 [/$/, 's']
446 def singular_rules #:doc:
448 [/(matr)ices$/i, '\1ix'],
449 [/(vert)ices$/i, '\1ex'],
450 [/^(ox)en/i, '\1'],
451 [/(alias)es$/i, '\1'],
452 [/([octop|vir])i$/i, '\1us'],
453 [/(cris|ax|test)es$/i, '\1is'],
454 [/(shoe)s$/i, '\1'],
455 [/(o)es$/i, '\1'],
456 [/(bus)es$/i, '\1'],
457 [/([m|l])ice$/i, '\1ouse'],
458 [/(x|ch|ss|sh)es$/i, '\1'],
459 [/(m)ovies$/i, '\1\2ovie'],
460 [/(s)eries$/i, '\1\2eries'],
461 [/([^aeiouy]|qu)ies$/i, '\1y'],
462 [/([lr])ves$/i, '\1f'],
463 [/(tive)s$/i, '\1'],
464 [/(hive)s$/i, '\1'],
465 [/([^f])ves$/i, '\1fe'],
466 [/(^analy)ses$/i, '\1sis'],
467 [/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i, '\1\2sis'],
468 [/([ti])a$/i, '\1um'],
469 [/(p)eople$/i, '\1\2erson'],
470 [/(m)en$/i, '\1an'],
471 [/(s)tatus$/i, '\1\2tatus'],
472 [/(c)hildren$/i, '\1\2hild'],
473 [/(n)ews$/i, '\1\2ews'],
474 [/s$/i, '']
477 end*/