jQuery RSS/ATOM feed parser plugin
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

467 lines
18 KiB

  1. #Pack (July 2005)
  2. # Based on "Pack.js" by Dean Edwards <http://dean.edwards.name/>
  3. # Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>
  4. # Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>
  5. package Pack;
  6. use strict;
  7. use Data::Dumper;
  8. use ParseMaster;
  9. # Package wide variable declarations
  10. use vars qw/$VERSION $PM_VERSION
  11. $_X_encodePrivate $_JSunpack $_JSdecode %baseLookup
  12. $_X_encode10 $_X_encode36 $_X_encode62 $_X_encode95
  13. $_JSencode10 $_JSencode36 $_JSencode62 $_JSencode95
  14. @_X_parsers
  15. $_X_script $_X_encoding $_X_fastDecode $_X_specialChars
  16. /;
  17. $VERSION = '024';
  18. $PM_VERSION = $ParseMaster::VERSION;
  19. # Package wide constants
  20. my $X_IGNORE = q{$1};
  21. my $X_ENCODE = q/\x24encode\(\x24count\)/; # NB: requires g modifier
  22. my $PERL = 'perl'; # Flag to indicate whether we need to use one of our "internal" Perl encoding functions
  23. my $JSCRIPT = 'jscript'; # or embed a pre-build JScript encoding function
  24. ########################################
  25. ##################
  26. sub pack($$$$) { # require 4 arguments
  27. ##################
  28. #print Dumper(@_);
  29. ($_X_script, $_X_encoding, $_X_fastDecode, $_X_specialChars) = @_;
  30. # validate parameters (sort of!)
  31. $_X_script .= "\n";
  32. $_X_encoding = ($_X_encoding > 95) ? 95 : $_X_encoding;
  33. @_X_parsers = (); # Reset parsers
  34. ####################
  35. sub _X_pack($) { # require 1 argument
  36. ####################
  37. # apply all parsing routines
  38. my $X_script = shift;
  39. for (my $i = 0; $i<scalar(@_X_parsers); $i++) {
  40. my $X_parse = $_X_parsers[$i];
  41. $X_script = &$X_parse($X_script);
  42. }
  43. return $X_script;
  44. };
  45. ######################
  46. sub _X_addParser { #
  47. ######################
  48. # keep a list of parsing functions, they'll be executed all at once
  49. my $X_parser = shift;
  50. push (@_X_parsers,$X_parser);
  51. }
  52. #############################
  53. sub _X_basicCompression { #
  54. #############################
  55. # zero encoding - just removal of white space and comments
  56. my $X_script = shift;
  57. my $parser = ParseMaster->new();
  58. # make safe
  59. $parser->escapeChar("\\");
  60. # protect strings
  61. $parser->add(q/'[^'\n\r]*'/, $X_IGNORE);
  62. $parser->add(q/"[^"\n\r]*"/, $X_IGNORE);
  63. # remove comments
  64. $parser->add(q/\/\/[^\n\r]*[\n\r]/);
  65. $parser->add(q/\/\*[^*]*\*+([^\/][^*]*\*+)*\//);
  66. # protect regular expressions
  67. $parser->add(q/\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)/, q{$2}); # IGNORE
  68. $parser->add(q/[^\w\x24\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?/, $X_IGNORE);
  69. # remove: ;;; doSomething();
  70. $parser->add(q/;;[^\n\r]+[\n\r]/) if ($_X_specialChars);
  71. # remove redundant semi-colons
  72. $parser->add(q/;+\s*([};])/, q{$2});
  73. # remove white-space
  74. $parser->add(q/(\b|\x24)\s+(\b|\x24)/, q{$2 $3});
  75. $parser->add(q/([+\-])\s+([+\-])/, q{$2 $3});
  76. $parser->add(q/\s+/, '');
  77. # done
  78. return $parser->exec($X_script);
  79. }
  80. ###############################
  81. sub _X_encodeSpecialChars { #
  82. ###############################
  83. my $X_script = shift;
  84. my $parser = ParseMaster->new();
  85. # replace: $name -> n, $$name -> $$na
  86. $parser->add(q/((\x24+)([a-zA-Z\x24_]+))(\d*)/,
  87. sub {
  88. my $X_offset = pop;
  89. my @X_match = @_;
  90. my $X_length = length($X_match[$X_offset+2]);
  91. my $lengthnext = length($X_match[$X_offset+3]);
  92. my $X_start = $X_length - ((($X_length - $lengthnext) > 0) ? ($X_length - $lengthnext) : 0);
  93. my $str = $X_match[$X_offset+1];
  94. $str = substr($str,$X_start,$X_length) . $X_match[$X_offset+4];
  95. return "$str";
  96. });
  97. # replace: _name -> _0, double-underscore (__name) is ignored
  98. my $X_regexp = q/\b_[A-Za-z\d]\w*/;
  99. # build the word list
  100. my %X_keywords = &_X_analyze($X_script, $X_regexp, $_X_encodePrivate);
  101. #print Dumper(%X_keywords);
  102. # quick ref
  103. my $X_encoded = \$X_keywords{X_encoded}; # eg _private1 => '_0',_private2 => '_1';
  104. #print Dumper($X_encoded);
  105. $parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
  106. return $parser->exec($X_script);
  107. };
  108. ###########################
  109. sub _X_encodeKeywords { #
  110. ###########################
  111. my $X_script = shift;
  112. # escape high-ascii values already in the script (i.e. in strings)
  113. if ($_X_encoding > 62) {$X_script = &_X_escape95($X_script)};
  114. # create the parser
  115. my $parser = ParseMaster->new();
  116. my $X_encode = &_X_getEncoder($_X_encoding,$PERL);
  117. # for high-ascii, don't encode single character low-ascii
  118. my $X_regexp = ($_X_encoding > 62) ? q/\w\w+/ : q/\w+/;
  119. # build the word list
  120. my %X_keywords = &_X_analyze($X_script, $X_regexp, $X_encode);
  121. #print Dumper(%X_keywords);
  122. my $X_encoded = \$X_keywords{X_encoded}; # eg alert => 2, function => 10 etc
  123. # encode
  124. $parser->add($X_regexp, sub {my $X_offset = pop; my @X_match = @_; return ${$X_encoded}->{$X_match[$X_offset]};});
  125. # if encoded, wrap the script in a decoding function
  126. return $X_script && _X_bootStrap(\$parser->exec($X_script), \%X_keywords);
  127. }
  128. ####################
  129. sub _X_analyze { #
  130. ####################
  131. #print Dumper(@_);
  132. my ($X_script, $X_regexp, $X_encode) = @_;
  133. # analyse
  134. # retreive all words in the script
  135. my @X_all = $X_script =~ m/$X_regexp/g; # Save all captures in a list context
  136. my %XX_sorted = (); # list of words sorted by frequency
  137. my %XX_encoded = (); # dictionary of word->encoding
  138. my %XX_protected = (); # instances of "protected" words
  139. if (@X_all) {
  140. my @X_unsorted = (); # same list, not sorted
  141. my %X_protected = (); # "protected" words (dictionary of word->"word")
  142. my %X_values = (); # dictionary of charCode->encoding (eg. 256->ff)
  143. my %X_count = (); # word->count
  144. my $i = scalar(@X_all); my $j = 0; my $X_word = '';
  145. # count the occurrences - used for sorting later
  146. do {
  147. $X_word = '$' . $X_all[--$i];
  148. if (!exists($X_count{$X_word})) {
  149. $X_count{$X_word} = [0,$i]; # Store both the usage count and original array position (ie a secondary sort key)
  150. $X_unsorted[$j] = $X_word;
  151. # make a dictionary of all of the protected words in this script
  152. # these are words that might be mistaken for encoding
  153. $X_values{$j} = &$X_encode($j);
  154. my $v = '$'.$X_values{$j};
  155. $X_protected{$v} = $j++;
  156. }
  157. # increment the word counter
  158. $X_count{$X_word}[0]++;
  159. } while ($i);
  160. #print Dumper (%X_values);
  161. #print Dumper (@X_unsorted);
  162. #print Dumper (%X_protected);
  163. # prepare to sort the word list, first we must protect
  164. # words that are also used as codes. we assign them a code
  165. # equivalent to the word itself.
  166. # e.g. if "do" falls within our encoding range
  167. # then we store keywords["do"] = "do";
  168. # this avoids problems when decoding
  169. $i = scalar(@X_unsorted);
  170. do {
  171. $X_word = $X_unsorted[--$i];
  172. if (exists($X_protected{$X_word})) {
  173. $XX_sorted{$X_protected{$X_word}} = substr($X_word,1);
  174. $XX_protected{$X_protected{$X_word}} = 1; # true
  175. $X_count{$X_word}[0] = 0;
  176. }
  177. } while ($i);
  178. #print Dumper (%XX_protected);
  179. #print Dumper (%XX_sorted);
  180. #print Dumper (%X_count);
  181. # sort the words by frequency
  182. # Sort with count a primary key and original array order as secondary key - which is apparently the default in javascript!
  183. @X_unsorted = sort ({($X_count{$b}[0] - $X_count{$a}[0]) or ($X_count{$b}[1] <=> $X_count{$a}[1])} @X_unsorted);
  184. #print Dumper (@X_unsorted) . "\n";
  185. $j = 0;
  186. # because there are "protected" words in the list
  187. # we must add the sorted words around them
  188. do {
  189. if (!exists($XX_sorted{$i})) {$XX_sorted{$i} = substr($X_unsorted[$j++],1)}
  190. $XX_encoded{$XX_sorted{$i}} = $X_values{$i};
  191. } while (++$i < scalar(@X_unsorted));
  192. }
  193. #print Dumper(X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
  194. return (X_sorted => \%XX_sorted, X_encoded => \%XX_encoded, X_protected => \%XX_protected);
  195. }
  196. ######################
  197. sub _X_bootStrap { #
  198. ######################
  199. # build the boot function used for loading and decoding
  200. my ($X_packed, $X_keywords) = @_; # Reference arguments!
  201. #print Dumper ($X_keywords) . "\n";
  202. # $packed: the packed script - dereference and escape
  203. $X_packed = "'" . &_X_escape($$X_packed) ."'";
  204. my %sorted = %{$$X_keywords{X_sorted}}; # Dereference to local variables
  205. my %protected = %{$$X_keywords{X_protected}}; # for simplicity
  206. my @sorted = ();
  207. foreach my $key (keys %sorted) {$sorted[$key] = $sorted{$key}}; # Convert hash to a standard list
  208. # ascii: base for encoding
  209. my $X_ascii = ((scalar(@sorted) > $_X_encoding) ? $_X_encoding : scalar(@sorted)) || 1;
  210. # count: number of (unique {RS}) words contained in the script
  211. my $X_count = scalar(@sorted); # Use $X_count for assigning $X_ascii
  212. # keywords: list of words contained in the script
  213. foreach my $i (keys %protected) {$sorted[$i] = ''}; # Blank out protected words
  214. #print Dumper(@sorted) . "\n";
  215. # convert from a string to an array - prepare keywords as a JScript string->array {RS}
  216. $X_keywords = "'" . join('|',@sorted) . "'.split('|')";
  217. # encode: encoding function (used for decoding the script)
  218. my $X_encode = $_X_encoding > 62 ? $_JSencode95 : &_X_getEncoder($X_ascii,$JSCRIPT); # This is a JScript function (as a string)
  219. $X_encode =~ s/_encoding/\x24ascii/g; $X_encode =~ s/arguments\.callee/\x24encode/g;
  220. my $X_inline = '$count' . ($X_ascii > 10 ? '.toString($ascii)' : '');
  221. # decode: code snippet to speed up decoding
  222. my $X_decode = '';
  223. if ($_X_fastDecode) {
  224. # create the decoder
  225. $X_decode = &_X_getFunctionBody($_JSdecode); # ie from the Javascript literal function
  226. if ($_X_encoding > 62) {$X_decode =~ s/\\\\w/[\\xa1-\\xff]/g}
  227. # perform the encoding inline for lower ascii values
  228. elsif ($X_ascii < 36) {$X_decode =~ s/$X_ENCODE/$X_inline/g}
  229. # special case: when $X_count==0 there ar no keywords. i want to keep
  230. # the basic shape of the unpacking funcion so i'll frig the code...
  231. if (!$X_count) {$X_decode =~ s/(\x24count)\s*=\s*1/$1=0/}
  232. }
  233. # boot function
  234. my $X_unpack = $_JSunpack;
  235. if ($_X_fastDecode) {
  236. # insert the decoder
  237. $X_unpack =~ s/\{/\{$X_decode;/;
  238. }
  239. $X_unpack =~ s/"/'/g;
  240. if ($_X_encoding > 62) { # high-ascii
  241. # get rid of the word-boundaries for regexp matches
  242. $X_unpack =~ s/'\\\\b'\s*\+|\+\s*'\\\\b'//g; # Not checked! {RS}
  243. }
  244. if ($X_ascii > 36 || $_X_encoding > 62 || $_X_fastDecode) {
  245. # insert the encode function
  246. $X_unpack =~ s/\{/\{\$encode=$X_encode;/;
  247. } else {
  248. # perform the encoding inline
  249. $X_unpack =~ s/$X_ENCODE/$X_inline/;
  250. }
  251. # arguments {RS} Do this before using &pack because &pack changes the pack parameters (eg $fastDecode) in Perl!!
  252. my $X_params = "$X_packed,$X_ascii,$X_count,$X_keywords"; # Interpolate to comma separated string
  253. if ($_X_fastDecode) {
  254. # insert placeholders for the decoder
  255. $X_params .= ',0,{}';
  256. }
  257. # pack the boot function too
  258. $X_unpack = &pack($X_unpack,0,0,1);
  259. # the whole thing
  260. return "eval(" . $X_unpack . "(" . $X_params . "))\n";
  261. };
  262. #######################
  263. sub _X_getEncoder { #
  264. #######################
  265. # mmm.. ..which one do i need ?? ({RS} Perl or JScript ??)
  266. my ($X_ascii,$language) = @_;
  267. my $perl_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_X_encode95 : $_X_encode62 : $_X_encode36 : $_X_encode10;
  268. my $jscript_encoder = ($X_ascii > 10) ? ($X_ascii > 36) ? ($X_ascii > 62) ? $_JSencode95 : $_JSencode62 : $_JSencode36 : $_JSencode10;
  269. return ($language eq $JSCRIPT) ? $jscript_encoder : $perl_encoder;
  270. };
  271. #############################
  272. # Perl versions of encoders #
  273. #############################
  274. # base10 zero encoding - characters: 0123456789
  275. $_X_encode10 = sub {return &_encodeBase(shift,10)};
  276. # base36 - characters: 0123456789abcdefghijklmnopqrstuvwxyz
  277. $_X_encode36 = sub {return &_encodeBase(shift,36)};
  278. # base62 - characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  279. $_X_encode62 = sub {return &_encodeBase(shift,62)};
  280. # high-ascii values - characters: ����������������������������������������������������������������������������������������������
  281. $_X_encode95 = sub {return &_encodeBase(shift,95)};
  282. # Lookup character sets for baseN encoding
  283. $baseLookup{10} = [(0..9)[0..9]]; # base 10
  284. $baseLookup{36} = [(0..9,'a'..'z')[0..35]]; # base 36
  285. $baseLookup{62} = [(0..9,'a'..'z','A'..'Z')[0..61]]; # base 62
  286. $baseLookup{95} = (); for (my $i=0; $i<95; $i++) {$baseLookup{95}[$i] = chr($i+161)}; # base95 (high ascii)
  287. #print Dumper(%baseLookup);
  288. #####################
  289. sub _encodeBase { #
  290. #####################
  291. # Generic base conversion function using defined lookup arrays (perl version only)
  292. my ($X_charCode, $base) = @_;
  293. my $X_encoded = '';
  294. # Do we know this encoding?
  295. if (exists ($baseLookup{$base})) {
  296. if ($X_charCode == 0) {$X_encoded = $baseLookup{$base}[0]}
  297. while($X_charCode > 0) {
  298. $X_encoded = $baseLookup{$base}[$X_charCode % $base] . $X_encoded;
  299. $X_charCode = int($X_charCode / $base);
  300. }
  301. }
  302. else {$X_encoded = "$X_charCode"} # default is to return unchanged (ie as for base 10) if no baselookup is available
  303. return $X_encoded;
  304. };
  305. #############################
  306. $_X_encodePrivate = sub { #
  307. #############################
  308. # special _chars
  309. my $X_charCode = shift;
  310. return '_' . $X_charCode;
  311. };
  312. ############################
  313. sub _X_escape($script) { #
  314. ############################
  315. # protect characters used by the parser
  316. my $X_script = shift;
  317. $X_script =~ s/([\\'])/\\$1/g;
  318. return $X_script;
  319. };
  320. #####################
  321. sub _X_escape95 { #
  322. #####################
  323. # protect high-ascii characters already in the script
  324. my $X_script = shift;
  325. $X_script =~ s/([\xa1-\xff])/sprintf("\\x%1x",ord($1))/eg;
  326. return $X_script;
  327. };
  328. ############################
  329. sub _X_getFunctionBody { #
  330. ############################
  331. # extract the body of a function (ie between opening/closing {}) - consistent with Dean Edwards approach
  332. my $X_function = shift;
  333. $X_function =~ m/^.*\{(.*)\}*$/sg; # Multiline, global (greedy)
  334. my $start = index($X_function,'{');
  335. my $end = rindex($X_function,'}');
  336. $X_function = substr($X_function,($start+1),($end-1-$start));
  337. return $X_function;
  338. };
  339. ######################
  340. sub _X_globalize { #
  341. ######################
  342. # set the global flag on a RegExp (you have to create a new one) !!! Unused in perl version
  343. # my $X_regexp = shift;
  344. };
  345. # build the parsing routine
  346. &_X_addParser(\&_X_basicCompression);
  347. &_X_addParser(\&_X_encodeSpecialChars) if ($_X_specialChars);
  348. &_X_addParser(\&_X_encodeKeywords) if ($_X_encoding);
  349. # go!
  350. return &_X_pack($_X_script);
  351. }
  352. ########################
  353. # Javascript Literals #
  354. ########################
  355. # JScript function "_unpack" - from DeanEdwards pack.js (NB: No ";" after final "}")
  356. ($_JSunpack) = <<'END_JSCRIPT_UNPACK';
  357. /* unpacking function - this is the boot strap function */
  358. /* data extracted from this packing routine is passed to */
  359. /* this function when decoded in the target */
  360. function($packed, $ascii, $count, $keywords, $encode, $decode) {
  361. while ($count--)
  362. if ($keywords[$count])
  363. $packed = $packed.replace(new RegExp('\\b' + $encode($count) + '\\b', 'g'), $keywords[$count]);
  364. /* RS_Debug = $packed; */ /* {RS} !!!!!!!!! */
  365. return $packed;
  366. }
  367. END_JSCRIPT_UNPACK
  368. # JScript function "_decode" - from DeanEdwards pack.js
  369. ($_JSdecode) = <<'END_JSCRIPT_DECODE';
  370. /* code-snippet inserted into the unpacker to speed up decoding */
  371. function() {
  372. /* does the browser support String.replace where the */
  373. /* replacement value is a function? */
  374. if (!''.replace(/^/, String)) {
  375. /* decode all the values we need */
  376. while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
  377. /* global replacement function */
  378. $keywords = [function($encoded){return $decode[$encoded]}];
  379. /* generic match */
  380. $encode = function(){return'\\w+'};
  381. /* reset the loop counter - we are now doing a global replace */
  382. $count = 1;
  383. }
  384. };
  385. END_JSCRIPT_DECODE
  386. # JScript versions of encoders
  387. ($_JSencode10) = <<'END_JSCRIPT_ENCODE10';
  388. /* zero encoding */
  389. /* characters: 0123456789 */
  390. function($charCode) {
  391. return $charCode;
  392. };
  393. END_JSCRIPT_ENCODE10
  394. ($_JSencode36) = <<'END_JSCRIPT_ENCODE36';
  395. /* inherent base36 support */
  396. /* characters: 0123456789abcdefghijklmnopqrstuvwxyz */
  397. function($charCode) {
  398. return $charCode.toString(36);
  399. };
  400. END_JSCRIPT_ENCODE36
  401. ($_JSencode62) = <<'END_JSCRIPT_ENCODE62';
  402. /* hitch a ride on base36 and add the upper case alpha characters */
  403. /* characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ */
  404. function($charCode) {
  405. return ($charCode < _encoding ? '' : arguments.callee(parseInt($charCode / _encoding))) +
  406. (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
  407. };
  408. END_JSCRIPT_ENCODE62
  409. ($_JSencode95) = <<'END_JSCRIPT_ENCODE95';
  410. /* use high-ascii values */
  411. /* characters: ���������������������������������������������������������������������������������������������� */
  412. function($charCode) {
  413. return ($charCode < _encoding ? '' : arguments.callee($charCode / _encoding)) +
  414. String.fromCharCode($charCode % _encoding + 161);
  415. };
  416. END_JSCRIPT_ENCODE95
  417. ###########
  418. # END #
  419. ###########
  420. 1; # Pack #
  421. ###########