A modest collection of PHP libraries used at SparkFun.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

312 lines
13 KiB

12 years ago
12 years ago
  1. <?php
  2. namespace SparkLib\Util;
  3. use \iconv;
  4. /**
  5. * A place to hang little text utility functions. It's only a class so the
  6. * autoloader can pick it up. Use like:
  7. *
  8. * <code>
  9. * use \Spark\Text;
  10. * $foo = Text::asciify($bar);
  11. * </code>
  12. */
  13. class Text {
  14. /**
  15. * Transliterate a string or array in a given source character set to the
  16. * nearest ASCII equivalent (according to iconv()).
  17. *
  18. * @param $text string or array to convert
  19. * @param $source_charset string name of source character set
  20. * @return string
  21. */
  22. public static function asciify ($text, $source_charset = 'UTF-8')
  23. {
  24. if (is_array($text))
  25. $encoding = mb_detect_encoding($text[0]);
  26. else
  27. $encoding = mb_detect_encoding($text);
  28. if (! $encoding)
  29. $encoding = $source_charset;
  30. // LC_CTYPE cannot be C or POSIX
  31. // http://us3.php.net/manual/en/function.iconv.php#74101
  32. setlocale(LC_CTYPE, 'en_US.UTF-8');
  33. ini_set('mbstring.substitute_character', "none");
  34. if (is_array($text)) {
  35. foreach ($text as &$t) {
  36. $t = mb_convert_encoding($t, $encoding, 'UTF-8');
  37. $t = static::remove_accents($t); // in case iconv sucks
  38. $t = iconv('UTF-8', "ASCII//TRANSLIT", $t);
  39. }
  40. } else {
  41. $text = mb_convert_encoding($text, $encoding, 'UTF-8');
  42. $text = static::remove_accents($text); // in case iconv sucks
  43. $text = iconv('UTF-8', "ASCII//TRANSLIT", $text);
  44. }
  45. return $text;
  46. }
  47. /**
  48. * https://gist.github.com/evaisse/169594
  49. * Unaccent the input string string. An example string like `ÀØėÿᾜὨζὅБю`
  50. * will be translated to `AOeyIOzoBY`. More complete than :
  51. * strtr( (string)$str,
  52. * "ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ",
  53. * "aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn" );
  54. *
  55. * @param $str input string
  56. * @param $utf8 if null, function will detect input string encoding
  57. * @return string input string without accent
  58. */
  59. public static function remove_accents( $str, $utf8=true )
  60. {
  61. $str = (string)$str;
  62. if( is_null($utf8) ) {
  63. if( !function_exists('mb_detect_encoding') ) {
  64. $utf8 = (strtolower( mb_detect_encoding($str) )=='utf-8');
  65. } else {
  66. $length = strlen($str);
  67. $utf8 = true;
  68. for ($i=0; $i < $length; $i++) {
  69. $c = ord($str[$i]);
  70. if ($c < 0x80) $n = 0; # 0bbbbbbb
  71. elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
  72. elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
  73. elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
  74. elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
  75. elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
  76. else return false; # Does not match any model
  77. for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  78. if ((++$i == $length)
  79. || ((ord($str[$i]) & 0xC0) != 0x80)) {
  80. $utf8 = false;
  81. break;
  82. }
  83. }
  84. }
  85. }
  86. }
  87. if(!$utf8)
  88. $str = utf8_encode($str);
  89. $transliteration = array(
  90. 'IJ' => 'I', 'Ö' => 'O','Œ' => 'O','Ü' => 'U','ä' => 'a','æ' => 'a',
  91. 'ij' => 'i','ö' => 'o','œ' => 'o','ü' => 'u','ß' => 's','ſ' => 's',
  92. 'À' => 'A','Á' => 'A','Â' => 'A','Ã' => 'A','Ä' => 'A','Å' => 'A',
  93. 'Æ' => 'A','Ā' => 'A','Ą' => 'A','Ă' => 'A','Ç' => 'C','Ć' => 'C',
  94. 'Č' => 'C','Ĉ' => 'C','Ċ' => 'C','Ď' => 'D','Đ' => 'D','È' => 'E',
  95. 'É' => 'E','Ê' => 'E','Ë' => 'E','Ē' => 'E','Ę' => 'E','Ě' => 'E',
  96. 'Ĕ' => 'E','Ė' => 'E','Ĝ' => 'G','Ğ' => 'G','Ġ' => 'G','Ģ' => 'G',
  97. 'Ĥ' => 'H','Ħ' => 'H','Ì' => 'I','Í' => 'I','Î' => 'I','Ï' => 'I',
  98. 'Ī' => 'I','Ĩ' => 'I','Ĭ' => 'I','Į' => 'I','İ' => 'I','Ĵ' => 'J',
  99. 'Ķ' => 'K','Ľ' => 'K','Ĺ' => 'K','Ļ' => 'K','Ŀ' => 'K','Ł' => 'L',
  100. 'Ñ' => 'N','Ń' => 'N','Ň' => 'N','Ņ' => 'N','Ŋ' => 'N','Ò' => 'O',
  101. 'Ó' => 'O','Ô' => 'O','Õ' => 'O','Ø' => 'O','Ō' => 'O','Ő' => 'O',
  102. 'Ŏ' => 'O','Ŕ' => 'R','Ř' => 'R','Ŗ' => 'R','Ś' => 'S','Ş' => 'S',
  103. 'Ŝ' => 'S','Ș' => 'S','Š' => 'S','Ť' => 'T','Ţ' => 'T','Ŧ' => 'T',
  104. 'Ț' => 'T','Ù' => 'U','Ú' => 'U','Û' => 'U','Ū' => 'U','Ů' => 'U',
  105. 'Ű' => 'U','Ŭ' => 'U','Ũ' => 'U','Ų' => 'U','Ŵ' => 'W','Ŷ' => 'Y',
  106. 'Ÿ' => 'Y','Ý' => 'Y','Ź' => 'Z','Ż' => 'Z','Ž' => 'Z','à' => 'a',
  107. 'á' => 'a','â' => 'a','ã' => 'a','ā' => 'a','ą' => 'a','ă' => 'a',
  108. 'å' => 'a','ç' => 'c','ć' => 'c','č' => 'c','ĉ' => 'c','ċ' => 'c',
  109. 'ď' => 'd','đ' => 'd','è' => 'e','é' => 'e','ê' => 'e','ë' => 'e',
  110. 'ē' => 'e','ę' => 'e','ě' => 'e','ĕ' => 'e','ė' => 'e','ƒ' => 'f',
  111. 'ĝ' => 'g','ğ' => 'g','ġ' => 'g','ģ' => 'g','ĥ' => 'h','ħ' => 'h',
  112. 'ì' => 'i','í' => 'i','î' => 'i','ï' => 'i','ī' => 'i','ĩ' => 'i',
  113. 'ĭ' => 'i','į' => 'i','ı' => 'i','ĵ' => 'j','ķ' => 'k','ĸ' => 'k',
  114. 'ł' => 'l','ľ' => 'l','ĺ' => 'l','ļ' => 'l','ŀ' => 'l','ñ' => 'n',
  115. 'ń' => 'n','ň' => 'n','ņ' => 'n','ʼn' => 'n','ŋ' => 'n','ò' => 'o',
  116. 'ó' => 'o','ô' => 'o','õ' => 'o','ø' => 'o','ō' => 'o','ő' => 'o',
  117. 'ŏ' => 'o','ŕ' => 'r','ř' => 'r','ŗ' => 'r','ś' => 's','š' => 's',
  118. 'ť' => 't','ù' => 'u','ú' => 'u','û' => 'u','ū' => 'u','ů' => 'u',
  119. 'ű' => 'u','ŭ' => 'u','ũ' => 'u','ų' => 'u','ŵ' => 'w','ÿ' => 'y',
  120. 'ý' => 'y','ŷ' => 'y','ż' => 'z','ź' => 'z','ž' => 'z','Α' => 'A',
  121. 'Ά' => 'A','Ἀ' => 'A','Ἁ' => 'A','Ἂ' => 'A','Ἃ' => 'A','Ἄ' => 'A',
  122. 'Ἅ' => 'A','Ἆ' => 'A','Ἇ' => 'A','ᾈ' => 'A','ᾉ' => 'A','ᾊ' => 'A',
  123. 'ᾋ' => 'A','ᾌ' => 'A','ᾍ' => 'A','ᾎ' => 'A','ᾏ' => 'A','Ᾰ' => 'A',
  124. 'Ᾱ' => 'A','Ὰ' => 'A','ᾼ' => 'A','Β' => 'B','Γ' => 'G','Δ' => 'D',
  125. 'Ε' => 'E','Έ' => 'E','Ἐ' => 'E','Ἑ' => 'E','Ἒ' => 'E','Ἓ' => 'E',
  126. 'Ἔ' => 'E','Ἕ' => 'E','Ὲ' => 'E','Ζ' => 'Z','Η' => 'I','Ή' => 'I',
  127. 'Ἠ' => 'I','Ἡ' => 'I','Ἢ' => 'I','Ἣ' => 'I','Ἤ' => 'I','Ἥ' => 'I',
  128. 'Ἦ' => 'I','Ἧ' => 'I','ᾘ' => 'I','ᾙ' => 'I','ᾚ' => 'I','ᾛ' => 'I',
  129. 'ᾜ' => 'I','ᾝ' => 'I','ᾞ' => 'I','ᾟ' => 'I','Ὴ' => 'I','ῌ' => 'I',
  130. 'Θ' => 'T','Ι' => 'I','Ί' => 'I','Ϊ' => 'I','Ἰ' => 'I','Ἱ' => 'I',
  131. 'Ἲ' => 'I','Ἳ' => 'I','Ἴ' => 'I','Ἵ' => 'I','Ἶ' => 'I','Ἷ' => 'I',
  132. 'Ῐ' => 'I','Ῑ' => 'I','Ὶ' => 'I','Κ' => 'K','Λ' => 'L','Μ' => 'M',
  133. 'Ν' => 'N','Ξ' => 'K','Ο' => 'O','Ό' => 'O','Ὀ' => 'O','Ὁ' => 'O',
  134. 'Ὂ' => 'O','Ὃ' => 'O','Ὄ' => 'O','Ὅ' => 'O','Ὸ' => 'O','Π' => 'P',
  135. 'Ρ' => 'R','Ῥ' => 'R','Σ' => 'S','Τ' => 'T','Υ' => 'Y','Ύ' => 'Y',
  136. 'Ϋ' => 'Y','Ὑ' => 'Y','Ὓ' => 'Y','Ὕ' => 'Y','Ὗ' => 'Y','Ῠ' => 'Y',
  137. 'Ῡ' => 'Y','Ὺ' => 'Y','Φ' => 'F','Χ' => 'X','Ψ' => 'P','Ω' => 'O',
  138. 'Ώ' => 'O','Ὠ' => 'O','Ὡ' => 'O','Ὢ' => 'O','Ὣ' => 'O','Ὤ' => 'O',
  139. 'Ὥ' => 'O','Ὦ' => 'O','Ὧ' => 'O','ᾨ' => 'O','ᾩ' => 'O','ᾪ' => 'O',
  140. 'ᾫ' => 'O','ᾬ' => 'O','ᾭ' => 'O','ᾮ' => 'O','ᾯ' => 'O','Ὼ' => 'O',
  141. 'ῼ' => 'O','α' => 'a','ά' => 'a','ἀ' => 'a','ἁ' => 'a','ἂ' => 'a',
  142. 'ἃ' => 'a','ἄ' => 'a','ἅ' => 'a','ἆ' => 'a','ἇ' => 'a','ᾀ' => 'a',
  143. 'ᾁ' => 'a','ᾂ' => 'a','ᾃ' => 'a','ᾄ' => 'a','ᾅ' => 'a','ᾆ' => 'a',
  144. 'ᾇ' => 'a','ὰ' => 'a','ᾰ' => 'a','ᾱ' => 'a','ᾲ' => 'a','ᾳ' => 'a',
  145. 'ᾴ' => 'a','ᾶ' => 'a','ᾷ' => 'a','β' => 'b','γ' => 'g','δ' => 'd',
  146. 'ε' => 'e','έ' => 'e','ἐ' => 'e','ἑ' => 'e','ἒ' => 'e','ἓ' => 'e',
  147. 'ἔ' => 'e','ἕ' => 'e','ὲ' => 'e','ζ' => 'z','η' => 'i','ή' => 'i',
  148. 'ἠ' => 'i','ἡ' => 'i','ἢ' => 'i','ἣ' => 'i','ἤ' => 'i','ἥ' => 'i',
  149. 'ἦ' => 'i','ἧ' => 'i','ᾐ' => 'i','ᾑ' => 'i','ᾒ' => 'i','ᾓ' => 'i',
  150. 'ᾔ' => 'i','ᾕ' => 'i','ᾖ' => 'i','ᾗ' => 'i','ὴ' => 'i','ῂ' => 'i',
  151. 'ῃ' => 'i','ῄ' => 'i','ῆ' => 'i','ῇ' => 'i','θ' => 't','ι' => 'i',
  152. 'ί' => 'i','ϊ' => 'i','ΐ' => 'i','ἰ' => 'i','ἱ' => 'i','ἲ' => 'i',
  153. 'ἳ' => 'i','ἴ' => 'i','ἵ' => 'i','ἶ' => 'i','ἷ' => 'i','ὶ' => 'i',
  154. 'ῐ' => 'i','ῑ' => 'i','ῒ' => 'i','ῖ' => 'i','ῗ' => 'i','κ' => 'k',
  155. 'λ' => 'l','μ' => 'm','ν' => 'n','ξ' => 'k','ο' => 'o','ό' => 'o',
  156. 'ὀ' => 'o','ὁ' => 'o','ὂ' => 'o','ὃ' => 'o','ὄ' => 'o','ὅ' => 'o',
  157. 'ὸ' => 'o','π' => 'p','ρ' => 'r','ῤ' => 'r','ῥ' => 'r','σ' => 's',
  158. 'ς' => 's','τ' => 't','υ' => 'y','ύ' => 'y','ϋ' => 'y','ΰ' => 'y',
  159. 'ὐ' => 'y','ὑ' => 'y','ὒ' => 'y','ὓ' => 'y','ὔ' => 'y','ὕ' => 'y',
  160. 'ὖ' => 'y','ὗ' => 'y','ὺ' => 'y','ῠ' => 'y','ῡ' => 'y','ῢ' => 'y',
  161. 'ῦ' => 'y','ῧ' => 'y','φ' => 'f','χ' => 'x','ψ' => 'p','ω' => 'o',
  162. 'ώ' => 'o','ὠ' => 'o','ὡ' => 'o','ὢ' => 'o','ὣ' => 'o','ὤ' => 'o',
  163. 'ὥ' => 'o','ὦ' => 'o','ὧ' => 'o','ᾠ' => 'o','ᾡ' => 'o','ᾢ' => 'o',
  164. 'ᾣ' => 'o','ᾤ' => 'o','ᾥ' => 'o','ᾦ' => 'o','ᾧ' => 'o','ὼ' => 'o',
  165. 'ῲ' => 'o','ῳ' => 'o','ῴ' => 'o','ῶ' => 'o','ῷ' => 'o','А' => 'A',
  166. 'Б' => 'B','В' => 'V','Г' => 'G','Д' => 'D','Е' => 'E','Ё' => 'E',
  167. 'Ж' => 'Z','З' => 'Z','И' => 'I','Й' => 'I','К' => 'K','Л' => 'L',
  168. 'М' => 'M','Н' => 'N','О' => 'O','П' => 'P','Р' => 'R','С' => 'S',
  169. 'Т' => 'T','У' => 'U','Ф' => 'F','Х' => 'K','Ц' => 'T','Ч' => 'C',
  170. 'Ш' => 'S','Щ' => 'S','Ы' => 'Y','Э' => 'E','Ю' => 'Y','Я' => 'Y',
  171. 'а' => 'A','б' => 'B','в' => 'V','г' => 'G','д' => 'D','е' => 'E',
  172. 'ё' => 'E','ж' => 'Z','з' => 'Z','и' => 'I','й' => 'I','к' => 'K',
  173. 'л' => 'L','м' => 'M','н' => 'N','о' => 'O','п' => 'P','р' => 'R',
  174. 'с' => 'S','т' => 'T','у' => 'U','ф' => 'F','х' => 'K','ц' => 'T',
  175. 'ч' => 'C','ш' => 'S','щ' => 'S','ы' => 'Y','э' => 'E','ю' => 'Y',
  176. 'я' => 'Y','ð' => 'd','Ð' => 'D','þ' => 't','Þ' => 'T','ა' => 'a',
  177. 'ბ' => 'b','გ' => 'g','დ' => 'd','ე' => 'e','ვ' => 'v','ზ' => 'z',
  178. 'თ' => 't','ი' => 'i','კ' => 'k','ლ' => 'l','მ' => 'm','ნ' => 'n',
  179. 'ო' => 'o','პ' => 'p','ჟ' => 'z','რ' => 'r','ს' => 's','ტ' => 't',
  180. 'უ' => 'u','ფ' => 'p','ქ' => 'k','ღ' => 'g','ყ' => 'q','შ' => 's',
  181. 'ჩ' => 'c','ც' => 't','ძ' => 'd','წ' => 't','ჭ' => 'c','ხ' => 'k',
  182. 'ჯ' => 'j','ჰ' => 'h'
  183. );
  184. $str = str_replace( array_keys( $transliteration ),
  185. array_values( $transliteration ),
  186. $str);
  187. return $str;
  188. }
  189. /**
  190. * Run a variety of functions on a string or array, with the intent of making
  191. * shipping APIs happy.
  192. *
  193. * - Trim
  194. * - Transliterate (UTF-8 to ASCII)
  195. * - Remove non-ASCII characters
  196. * - Truncate
  197. */
  198. public static function shipify ($str, $length = null) {
  199. // manually specify some
  200. $trans = [
  201. "&" => "and",
  202. "'" => ""
  203. ];
  204. if (is_array($str)) {
  205. foreach ($str as &$s) {
  206. $s = trim(strtr($s, $trans));
  207. $s = static::asciify($s);
  208. if ($length) {
  209. $s = substr($s, 0, $length);
  210. }
  211. }
  212. } else {
  213. $str = trim(strtr($str, $trans));
  214. $str = static::asciify($str);
  215. if ($length) {
  216. $str = substr($str, 0, $length);
  217. }
  218. }
  219. return $str;
  220. }
  221. public static function truncate ($str, $len = 25, $pad = 0)
  222. {
  223. if(strlen($str) > $len - $pad)
  224. $ret = substr($str, 0, $len) . '&hellip;';
  225. else
  226. $ret = $str;
  227. return $ret;
  228. }
  229. public static function truncateWithTitle ($str, $len = 25, $pad = 0)
  230. {
  231. if(strlen($str) > $len - $pad)
  232. $ret = '<span title="' . htmlentities($str) . '">' . substr($str, 0, $len) . '&hellip;</span>';
  233. else
  234. $ret = $str;
  235. return $ret;
  236. }
  237. public static function truncateToWord ($str, $len = 25)
  238. {
  239. $str = trim(preg_replace('/\s+/', ' ', $str));
  240. if (strlen($str) > $len) {
  241. $ret = wordwrap($str, $len);
  242. $ret = substr($ret, 0, strpos($ret, "\n"));
  243. $ret .= '&hellip;';
  244. } else
  245. $ret = $str;
  246. return $ret;
  247. }
  248. public static function depluralize($word)
  249. {
  250. return rtrim($word, 's'); // heh
  251. }
  252. public static function pluralize($word)
  253. {
  254. if (substr($word,-1)==='s')
  255. return $word;
  256. return $word . 's';
  257. }
  258. // assumes we're coming from camelcase'd notation
  259. public static function underscore($string)
  260. {
  261. // yay php...
  262. $matches = null;
  263. preg_match_all('/[A-Z][^A-Z]*/',$string,$matches);
  264. $parts = array_map(function($s){return strtolower($s);},$matches[0]);
  265. return implode('_',$parts);
  266. }
  267. // assumes we're coming from underscored notation
  268. public static function camelcase($string)
  269. {
  270. $parts = explode('_', $string);
  271. foreach ($parts as &$part)
  272. $part = ucfirst($part);
  273. return implode ('', $parts);
  274. }
  275. // I didn't want to use size, file, or filesize for obvious reasons. open to better name...
  276. public static function prettyfilesize($bytes)
  277. {
  278. if ($bytes > 1048575) {
  279. $div = $bytes / 1048576;
  280. $size = round($div, 1)." MB";
  281. } else {
  282. $div = $bytes / 1024;
  283. $size = round($div, 1)." KB";
  284. }
  285. return $size;
  286. }
  287. }