* use \Spark\Text; * $foo = Text::asciify($bar); * */ class Text { /** * Transliterate a string or array in a given source character set to the * nearest ASCII equivalent (according to iconv()). * * @param $text string or array to convert * @param $source_charset string name of source character set * @return string */ public static function asciify ($text, $source_charset = 'UTF-8') { if (is_array($text)) $encoding = mb_detect_encoding($text[0]); else $encoding = mb_detect_encoding($text); if (! $encoding) $encoding = $source_charset; // LC_CTYPE cannot be C or POSIX // http://us3.php.net/manual/en/function.iconv.php#74101 setlocale(LC_CTYPE, 'en_US.UTF-8'); ini_set('mbstring.substitute_character', "none"); if (is_array($text)) { foreach ($text as &$t) { $t = mb_convert_encoding($t, $encoding, 'UTF-8'); $t = static::remove_accents($t); // in case iconv sucks $t = iconv('UTF-8', "ASCII//TRANSLIT", $t); } } else { $text = mb_convert_encoding($text, $encoding, 'UTF-8'); $text = static::remove_accents($text); // in case iconv sucks $text = iconv('UTF-8', "ASCII//TRANSLIT", $text); } return $text; } /** * https://gist.github.com/evaisse/169594 * Unaccent the input string string. An example string like `ÀØėÿᾜὨζὅБю` * will be translated to `AOeyIOzoBY`. More complete than : * strtr( (string)$str, * "ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ", * "aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn" ); * * @param $str input string * @param $utf8 if null, function will detect input string encoding * @return string input string without accent */ public static function remove_accents( $str, $utf8=true ) { $str = (string)$str; if( is_null($utf8) ) { if( !function_exists('mb_detect_encoding') ) { $utf8 = (strtolower( mb_detect_encoding($str) )=='utf-8'); } else { $length = strlen($str); $utf8 = true; for ($i=0; $i < $length; $i++) { $c = ord($str[$i]); if ($c < 0x80) $n = 0; # 0bbbbbbb elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b else return false; # Does not match any model for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) { $utf8 = false; break; } } } } } if(!$utf8) $str = utf8_encode($str); $transliteration = array( 'IJ' => 'I', 'Ö' => 'O','Œ' => 'O','Ü' => 'U','ä' => 'a','æ' => 'a', 'ij' => 'i','ö' => 'o','œ' => 'o','ü' => 'u','ß' => 's','ſ' => 's', 'À' => 'A','Á' => 'A','Â' => 'A','Ã' => 'A','Ä' => 'A','Å' => 'A', 'Æ' => 'A','Ā' => 'A','Ą' => 'A','Ă' => 'A','Ç' => 'C','Ć' => 'C', 'Č' => 'C','Ĉ' => 'C','Ċ' => 'C','Ď' => 'D','Đ' => 'D','È' => 'E', 'É' => 'E','Ê' => 'E','Ë' => 'E','Ē' => 'E','Ę' => 'E','Ě' => 'E', 'Ĕ' => 'E','Ė' => 'E','Ĝ' => 'G','Ğ' => 'G','Ġ' => 'G','Ģ' => 'G', 'Ĥ' => 'H','Ħ' => 'H','Ì' => 'I','Í' => 'I','Î' => 'I','Ï' => 'I', 'Ī' => 'I','Ĩ' => 'I','Ĭ' => 'I','Į' => 'I','İ' => 'I','Ĵ' => 'J', 'Ķ' => 'K','Ľ' => 'K','Ĺ' => 'K','Ļ' => 'K','Ŀ' => 'K','Ł' => 'L', 'Ñ' => 'N','Ń' => 'N','Ň' => 'N','Ņ' => 'N','Ŋ' => 'N','Ò' => 'O', 'Ó' => 'O','Ô' => 'O','Õ' => 'O','Ø' => 'O','Ō' => 'O','Ő' => 'O', 'Ŏ' => 'O','Ŕ' => 'R','Ř' => 'R','Ŗ' => 'R','Ś' => 'S','Ş' => 'S', 'Ŝ' => 'S','Ș' => 'S','Š' => 'S','Ť' => 'T','Ţ' => 'T','Ŧ' => 'T', 'Ț' => 'T','Ù' => 'U','Ú' => 'U','Û' => 'U','Ū' => 'U','Ů' => 'U', 'Ű' => 'U','Ŭ' => 'U','Ũ' => 'U','Ų' => 'U','Ŵ' => 'W','Ŷ' => 'Y', 'Ÿ' => 'Y','Ý' => 'Y','Ź' => 'Z','Ż' => 'Z','Ž' => 'Z','à' => 'a', 'á' => 'a','â' => 'a','ã' => 'a','ā' => 'a','ą' => 'a','ă' => 'a', 'å' => 'a','ç' => 'c','ć' => 'c','č' => 'c','ĉ' => 'c','ċ' => 'c', 'ď' => 'd','đ' => 'd','è' => 'e','é' => 'e','ê' => 'e','ë' => 'e', 'ē' => 'e','ę' => 'e','ě' => 'e','ĕ' => 'e','ė' => 'e','ƒ' => 'f', 'ĝ' => 'g','ğ' => 'g','ġ' => 'g','ģ' => 'g','ĥ' => 'h','ħ' => 'h', 'ì' => 'i','í' => 'i','î' => 'i','ï' => 'i','ī' => 'i','ĩ' => 'i', 'ĭ' => 'i','į' => 'i','ı' => 'i','ĵ' => 'j','ķ' => 'k','ĸ' => 'k', 'ł' => 'l','ľ' => 'l','ĺ' => 'l','ļ' => 'l','ŀ' => 'l','ñ' => 'n', 'ń' => 'n','ň' => 'n','ņ' => 'n','ʼn' => 'n','ŋ' => 'n','ò' => 'o', 'ó' => 'o','ô' => 'o','õ' => 'o','ø' => 'o','ō' => 'o','ő' => 'o', 'ŏ' => 'o','ŕ' => 'r','ř' => 'r','ŗ' => 'r','ś' => 's','š' => 's', 'ť' => 't','ù' => 'u','ú' => 'u','û' => 'u','ū' => 'u','ů' => 'u', 'ű' => 'u','ŭ' => 'u','ũ' => 'u','ų' => 'u','ŵ' => 'w','ÿ' => 'y', 'ý' => 'y','ŷ' => 'y','ż' => 'z','ź' => 'z','ž' => 'z','Α' => 'A', 'Ά' => 'A','Ἀ' => 'A','Ἁ' => 'A','Ἂ' => 'A','Ἃ' => 'A','Ἄ' => 'A', 'Ἅ' => 'A','Ἆ' => 'A','Ἇ' => 'A','ᾈ' => 'A','ᾉ' => 'A','ᾊ' => 'A', 'ᾋ' => 'A','ᾌ' => 'A','ᾍ' => 'A','ᾎ' => 'A','ᾏ' => 'A','Ᾰ' => 'A', 'Ᾱ' => 'A','Ὰ' => 'A','ᾼ' => 'A','Β' => 'B','Γ' => 'G','Δ' => 'D', 'Ε' => 'E','Έ' => 'E','Ἐ' => 'E','Ἑ' => 'E','Ἒ' => 'E','Ἓ' => 'E', 'Ἔ' => 'E','Ἕ' => 'E','Ὲ' => 'E','Ζ' => 'Z','Η' => 'I','Ή' => 'I', 'Ἠ' => 'I','Ἡ' => 'I','Ἢ' => 'I','Ἣ' => 'I','Ἤ' => 'I','Ἥ' => 'I', 'Ἦ' => 'I','Ἧ' => 'I','ᾘ' => 'I','ᾙ' => 'I','ᾚ' => 'I','ᾛ' => 'I', 'ᾜ' => 'I','ᾝ' => 'I','ᾞ' => 'I','ᾟ' => 'I','Ὴ' => 'I','ῌ' => 'I', 'Θ' => 'T','Ι' => 'I','Ί' => 'I','Ϊ' => 'I','Ἰ' => 'I','Ἱ' => 'I', 'Ἲ' => 'I','Ἳ' => 'I','Ἴ' => 'I','Ἵ' => 'I','Ἶ' => 'I','Ἷ' => 'I', 'Ῐ' => 'I','Ῑ' => 'I','Ὶ' => 'I','Κ' => 'K','Λ' => 'L','Μ' => 'M', 'Ν' => 'N','Ξ' => 'K','Ο' => 'O','Ό' => 'O','Ὀ' => 'O','Ὁ' => 'O', 'Ὂ' => 'O','Ὃ' => 'O','Ὄ' => 'O','Ὅ' => 'O','Ὸ' => 'O','Π' => 'P', 'Ρ' => 'R','Ῥ' => 'R','Σ' => 'S','Τ' => 'T','Υ' => 'Y','Ύ' => 'Y', 'Ϋ' => 'Y','Ὑ' => 'Y','Ὓ' => 'Y','Ὕ' => 'Y','Ὗ' => 'Y','Ῠ' => 'Y', 'Ῡ' => 'Y','Ὺ' => 'Y','Φ' => 'F','Χ' => 'X','Ψ' => 'P','Ω' => 'O', 'Ώ' => 'O','Ὠ' => 'O','Ὡ' => 'O','Ὢ' => 'O','Ὣ' => 'O','Ὤ' => 'O', 'Ὥ' => 'O','Ὦ' => 'O','Ὧ' => 'O','ᾨ' => 'O','ᾩ' => 'O','ᾪ' => 'O', 'ᾫ' => 'O','ᾬ' => 'O','ᾭ' => 'O','ᾮ' => 'O','ᾯ' => 'O','Ὼ' => 'O', 'ῼ' => 'O','α' => 'a','ά' => 'a','ἀ' => 'a','ἁ' => 'a','ἂ' => 'a', 'ἃ' => 'a','ἄ' => 'a','ἅ' => 'a','ἆ' => 'a','ἇ' => 'a','ᾀ' => 'a', 'ᾁ' => 'a','ᾂ' => 'a','ᾃ' => 'a','ᾄ' => 'a','ᾅ' => 'a','ᾆ' => 'a', 'ᾇ' => 'a','ὰ' => 'a','ᾰ' => 'a','ᾱ' => 'a','ᾲ' => 'a','ᾳ' => 'a', 'ᾴ' => 'a','ᾶ' => 'a','ᾷ' => 'a','β' => 'b','γ' => 'g','δ' => 'd', 'ε' => 'e','έ' => 'e','ἐ' => 'e','ἑ' => 'e','ἒ' => 'e','ἓ' => 'e', 'ἔ' => 'e','ἕ' => 'e','ὲ' => 'e','ζ' => 'z','η' => 'i','ή' => 'i', 'ἠ' => 'i','ἡ' => 'i','ἢ' => 'i','ἣ' => 'i','ἤ' => 'i','ἥ' => 'i', 'ἦ' => 'i','ἧ' => 'i','ᾐ' => 'i','ᾑ' => 'i','ᾒ' => 'i','ᾓ' => 'i', 'ᾔ' => 'i','ᾕ' => 'i','ᾖ' => 'i','ᾗ' => 'i','ὴ' => 'i','ῂ' => 'i', 'ῃ' => 'i','ῄ' => 'i','ῆ' => 'i','ῇ' => 'i','θ' => 't','ι' => 'i', 'ί' => 'i','ϊ' => 'i','ΐ' => 'i','ἰ' => 'i','ἱ' => 'i','ἲ' => 'i', 'ἳ' => 'i','ἴ' => 'i','ἵ' => 'i','ἶ' => 'i','ἷ' => 'i','ὶ' => 'i', 'ῐ' => 'i','ῑ' => 'i','ῒ' => 'i','ῖ' => 'i','ῗ' => 'i','κ' => 'k', 'λ' => 'l','μ' => 'm','ν' => 'n','ξ' => 'k','ο' => 'o','ό' => 'o', 'ὀ' => 'o','ὁ' => 'o','ὂ' => 'o','ὃ' => 'o','ὄ' => 'o','ὅ' => 'o', 'ὸ' => 'o','π' => 'p','ρ' => 'r','ῤ' => 'r','ῥ' => 'r','σ' => 's', 'ς' => 's','τ' => 't','υ' => 'y','ύ' => 'y','ϋ' => 'y','ΰ' => 'y', 'ὐ' => 'y','ὑ' => 'y','ὒ' => 'y','ὓ' => 'y','ὔ' => 'y','ὕ' => 'y', 'ὖ' => 'y','ὗ' => 'y','ὺ' => 'y','ῠ' => 'y','ῡ' => 'y','ῢ' => 'y', 'ῦ' => 'y','ῧ' => 'y','φ' => 'f','χ' => 'x','ψ' => 'p','ω' => 'o', 'ώ' => 'o','ὠ' => 'o','ὡ' => 'o','ὢ' => 'o','ὣ' => 'o','ὤ' => 'o', 'ὥ' => 'o','ὦ' => 'o','ὧ' => 'o','ᾠ' => 'o','ᾡ' => 'o','ᾢ' => 'o', 'ᾣ' => 'o','ᾤ' => 'o','ᾥ' => 'o','ᾦ' => 'o','ᾧ' => 'o','ὼ' => 'o', 'ῲ' => 'o','ῳ' => 'o','ῴ' => 'o','ῶ' => 'o','ῷ' => 'o','А' => 'A', 'Б' => 'B','В' => 'V','Г' => 'G','Д' => 'D','Е' => 'E','Ё' => 'E', 'Ж' => 'Z','З' => 'Z','И' => 'I','Й' => 'I','К' => 'K','Л' => 'L', 'М' => 'M','Н' => 'N','О' => 'O','П' => 'P','Р' => 'R','С' => 'S', 'Т' => 'T','У' => 'U','Ф' => 'F','Х' => 'K','Ц' => 'T','Ч' => 'C', 'Ш' => 'S','Щ' => 'S','Ы' => 'Y','Э' => 'E','Ю' => 'Y','Я' => 'Y', 'а' => 'A','б' => 'B','в' => 'V','г' => 'G','д' => 'D','е' => 'E', 'ё' => 'E','ж' => 'Z','з' => 'Z','и' => 'I','й' => 'I','к' => 'K', 'л' => 'L','м' => 'M','н' => 'N','о' => 'O','п' => 'P','р' => 'R', 'с' => 'S','т' => 'T','у' => 'U','ф' => 'F','х' => 'K','ц' => 'T', 'ч' => 'C','ш' => 'S','щ' => 'S','ы' => 'Y','э' => 'E','ю' => 'Y', 'я' => 'Y','ð' => 'd','Ð' => 'D','þ' => 't','Þ' => 'T','ა' => 'a', 'ბ' => 'b','გ' => 'g','დ' => 'd','ე' => 'e','ვ' => 'v','ზ' => 'z', 'თ' => 't','ი' => 'i','კ' => 'k','ლ' => 'l','მ' => 'm','ნ' => 'n', 'ო' => 'o','პ' => 'p','ჟ' => 'z','რ' => 'r','ს' => 's','ტ' => 't', 'უ' => 'u','ფ' => 'p','ქ' => 'k','ღ' => 'g','ყ' => 'q','შ' => 's', 'ჩ' => 'c','ც' => 't','ძ' => 'd','წ' => 't','ჭ' => 'c','ხ' => 'k', 'ჯ' => 'j','ჰ' => 'h' ); $str = str_replace( array_keys( $transliteration ), array_values( $transliteration ), $str); return $str; } /** * Run a variety of functions on a string or array, with the intent of making * shipping APIs happy. * * - Trim * - Transliterate (UTF-8 to ASCII) * - Remove non-ASCII characters * - Truncate */ public static function shipify ($str, $length = null) { // manually specify some $trans = [ "&" => "and", "'" => "" ]; if (is_array($str)) { foreach ($str as &$s) { $s = trim(strtr($s, $trans)); $s = static::asciify($s); if ($length) { $s = substr($s, 0, $length); } } } else { $str = trim(strtr($str, $trans)); $str = static::asciify($str); if ($length) { $str = substr($str, 0, $length); } } return $str; } public static function truncate ($str, $len = 25, $pad = 0) { if(strlen($str) > $len - $pad) $ret = substr($str, 0, $len) . '…'; else $ret = $str; return $ret; } public static function truncateWithTitle ($str, $len = 25, $pad = 0) { if(strlen($str) > $len - $pad) $ret = '' . substr($str, 0, $len) . '…'; else $ret = $str; return $ret; } public static function truncateToWord ($str, $len = 25) { $str = trim(preg_replace('/\s+/', ' ', $str)); if (strlen($str) > $len) { $ret = wordwrap($str, $len); $ret = substr($ret, 0, strpos($ret, "\n")); $ret .= '…'; } else $ret = $str; return $ret; } public static function depluralize($word) { return rtrim($word, 's'); // heh } public static function pluralize($word) { if (substr($word,-1)==='s') return $word; return $word . 's'; } // assumes we're coming from camelcase'd notation public static function underscore($string) { // yay php... $matches = null; preg_match_all('/[A-Z][^A-Z]*/',$string,$matches); $parts = array_map(function($s){return strtolower($s);},$matches[0]); return implode('_',$parts); } // assumes we're coming from underscored notation public static function camelcase($string) { $parts = explode('_', $string); foreach ($parts as &$part) $part = ucfirst($part); return implode ('', $parts); } // I didn't want to use size, file, or filesize for obvious reasons. open to better name... public static function prettyfilesize($bytes) { if ($bytes > 1048575) { $div = $bytes / 1048576; $size = round($div, 1)." MB"; } else { $div = $bytes / 1024; $size = round($div, 1)." KB"; } return $size; } }