ウェブサービスを作っています。

Inflector

今日は、英単語の名詞の複数形、単数形を変換するクラスを作りました。
PHP4/5 用で。
Inflector::pluralize('city'); で cities が返ってきたり、Inflector::singularize('cities'); で city が返ってきたりします。
ここCakePHP のソースを参考にしたら、あっさり完成。
変換ルールとかの配列は、ほぼ CakePHP のまんまです。
実は意味ない要素がある気がするのですが、面倒なのでまんまです。

<?php
class Inflector
{
    var $uninflected = array(
        '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox',
        '.*sheep', 'Amoyese', 'bison', 'Borghese', 'bream', 'breeches',
        'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
        'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris',
        'diabetes', 'djinn', 'eland', 'elk', 'equipment', 'Faroese',
        'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese',
        'graffiti', 'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information',
        'innings', 'jackanapes', 'Kiplingese', 'Kongoese', 'Lucchese', 'mackerel',
        'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese',
        'news', 'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers',
        'Pistoiese', 'pliers', 'Portuguese', 'proceedings', 'rabies', 'rice',
        'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series',
        'Shavese', 'shears', 'siemens', 'species', 'swine', 'testes',
        'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese', 'whiting',
        'wildebeest', 'Yengeese',
    );
    
    var $pluralRules = array(
        '/(s)tatus$/i' => '$1$2tatuses', '/(quiz)$/i' => '$1zes',
        '/^(ox)$/i' => '$1$2en', '/([m|l])ouse$/i' => '$1ice',
        '/(matr|vert|ind)(ix|ex)$/i'  => '$1ices', '/(x|ch|ss|sh)$/i' => '$1es',
        '/([^aeiouy]|qu)y$/i' => '$1ies', '/(hive)$/i' => '$1s',
        '/(?:([^f])fe|([lr])f)$/i' => '$1$2ves', '/sis$/i' => 'ses',
        '/([ti])um$/i' => '$1a', '/(p)erson$/i' => '$1eople',
        '/(m)an$/i' => '$1en', '/(c)hild$/i' => '$1hildren',
        '/(buffal|tomat)o$/i' => '$1$2oes', '/us$/' => 'uses',
        '/(alias)$/i' => '$1es', '/(octop|vir)us$/i' => '$1i',
        '/(ax|cri|test)is$/i' => '$1es', '/s$/' => 's',
        '/$/' => 's',
    );
    
    var $pluralIrregular = array(
        'atlas' => 'atlases', 'beef' => 'beefs', 'brother' => 'brothers',
        'child' => 'children', 'corpus' => 'corpuses', 'cow' => 'cows',
        'ganglion' => 'ganglions', 'genie' => 'genies', 'genus' => 'genera',
        'graffito' => 'graffiti', 'hoof' => 'hoofs', 'loaf' => 'loaves',
        'man' => 'men', 'money' => 'monies', 'mongoose' => 'mongooses',
        'move' => 'moves', 'mythos' => 'mythoi', 'numen' => 'numina',
        'occiput' => 'occiputs', 'octopus' => 'octopuses', 'opus' => 'opuses',
        'ox' => 'oxen', 'penis' => 'penises', 'person' => 'people',
        'sex' => 'sexes', 'soliloquy' => 'soliloquies', 'testis' => 'testes',
        'trilby' => 'trilbys', 'turf' => 'turfs',
    );
    
    var $singularRules = array(
        '/(s)tatuses$/i' => '$1$2tatus', '/^(.*)(menu)s$/i' => '$1$2',
        '/(quiz)zes$/i' => '$1', '/(matr)ices$/i' => '$1ix',
        '/(vert|ind)ices$/i' => '$1ex', '/^(ox)en/i' => '$1',
        '/(alias)(es)*$/i' => '$1', '/([octop|vir])i$/i' => '$1us',
        '/(cris|ax|test)es$/i' => '$1is', '/(shoe)s$/i' => '$1',
        '/(o)es$/i' => '$1', '/ouses$/' => 'ouse',
        '/uses$/' => 'us', '/([m|l])ice$/i' => '$1ouse',
        '/(x|ch|ss|sh)es$/i' => '$1', '/(m)ovies$/i' => '$1$2ovie',
        '/(s)eries$/i' => '$1$2eries', '/([^aeiouy]|qu)ies$/i' => '$1y',
        '/([lr])ves$/i' => '$1f', '/(tive)s$/i' => '$1',
        '/(hive)s$/i' => '$1', '/(drive)s$/i' => '$1',
        '/([^f])ves$/i' => '$1fe', '/(^analy)ses$/i' => '$1sis',
        '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '$1$2sis', '/([ti])a$/i' => '$1um',
        '/(p)eople$/i' => '$1$2erson',
        '/(m)en$/i' => '$1an', '/(c)hildren$/i' => '$1$2hild',
        '/(n)ews$/i' => '$1$2ews', '/^(.*us)$/' => '$1',
        '/s$/i' => '',
    );
    
    var $singularIrregular = array(
        'atlases' => 'atlas', 'beefs' => 'beef', 'brothers' => 'brother',
        'children' => 'child', 'corpuses' => 'corpus', 'cows' => 'cow',
        'ganglions' => 'ganglion', 'genies' => 'genie', 'genera' => 'genus',
        'graffiti' => 'graffito', 'hoofs' => 'hoof', 'loaves' => 'loaf',
        'men' => 'man', 'monies' => 'money', 'mongooses' => 'mongoose',
        'moves' => 'move', 'mythoi' => 'mythos', 'numina' => 'numen',
        'occiputs' => 'occiput', 'octopuses' => 'octopus', 'opuses' => 'opus',
        'oxen' => 'ox', 'penises' => 'penis', 'people' => 'person',
        'sexes' => 'sex', 'soliloquies' => 'soliloquy', 'testes' => 'testis',
        'trilbys' => 'trilby', 'turfs' => 'turf',
    );
    
    function pluralize($word)
    {
        $inflector = new Inflector;
        
        foreach ($inflector->uninflected as $val) {
            if (preg_match("/{$val}$/i", $word)) {
                return $word;
            }
        }
        
        foreach ($inflector->pluralIrregular as $cnt=>$val) {
            if (strtolower($word) == strtolower($cnt)) {
                return $val;
            }
        }
        
        foreach ($inflector->pluralRules as $cnt=>$val) {
            if (preg_match($cnt, $word)) {
                return preg_replace($cnt, $val, $word);
            }
        }
        
        return false;
    }
    
    function singularize($word)
    {
        $inflector = new Inflector;
        
        foreach ($inflector->uninflected as $val) {
            if (preg_match("/{$val}$/i", $word)) {
                return $word;
            }
        }
        
        foreach ($inflector->singularIrregular as $cnt=>$val) {
            if (strtolower($word) == strtolower($cnt)) {
                return $val;
            }
        }
        
        foreach ($inflector->singularRules as $cnt=>$val) {
            if (preg_match($cnt, $word)) {
                return preg_replace($cnt, $val, $word);
            }
        }
        
        return false;
    }
}