brennen
/
jFeed

#ParseMaster (July 25 2005)#  Based on "ParseMaster.js" by Dean Edwards <http://dean.edwards.name/>#  Ported to Perl by Rob Seiler, ELR Software Pty Ltd <http://www.elr.com.au>#  Copyright 2005. License <http://creativecommons.org/licenses/LGPL/2.1/>
package ParseMaster;use strict;use Data::Dumper;
# Package wide variable declarationsuse vars qw/$VERSION            @_X_escaped @_X_patterns           /;
$VERSION    = '017';
# constantsmy $X_EXPRESSION  = 0;my $X_REPLACEMENT = 1;my $X_LENGTH      = 2;
# re's used to determine nesting levelsmy $X_GROUPS      = qr/\(/o;                # NB: Requires g modifier!my $X_SUB_REPLACE = qr/\$\d/o;my $X_INDEXED     = qr/^\$\d+$/o;my $XX_ESCAPE     = qr/\\./o;               # NB: Requires g modifier!my $XX_DELETED    = qr/\001[^\001]*\001/o;  # NB: Requires g modifier!my $DIGIT         = qr/[^\D]/o;             # Yep - this is a digit - contains no non-digits
# Constructorsub new {  my $class = shift;  my $self  = {};  @_X_escaped  = ();  # Re-initialize global for each instance  @_X_patterns = ();  # Re-initialize global for each instance  # Instance variables - access by similarly named set/get functions  $self->{_ignoreCase_} = 0;  $self->{_escapeChar_} = '';  bless ($self, $class);  return $self;}
sub ignoreCase {  my ($self, $value) = @_;  if (defined($value)) {    $self->{_ignoreCase_} = $value;  }  return $self->{_ignoreCase_};}
sub escapeChar{  my ($self, $value) = @_;  if (defined($value)) {    $self->{_escapeChar_} = $value;  }  return $self->{_escapeChar_};}
######################## Public Parsemaster functions
my $X_DELETE = sub(@$) {  my $X_offset = pop;  my @X_match = @_;  return (chr(001) . $X_match[$X_offset] . chr(001));}; # NB semicolon required for closure!
# create and add a new pattern to the patterns collectionsub add {  my ($self, $expression, $X_replacement) = @_;  if (!$X_replacement) {$X_replacement = $X_DELETE};
  # count the number of sub-expressions  my $temp = &_X_internalEscape($expression);  my $length  = 1; # Always at least one because each pattern is itself a sub-expression     $length += $temp =~ s/$X_GROUPS//g; # One way to count the left capturing parentheses in the regexp string
  # does the pattern deal with sub-expressions?  if ((ref($X_replacement) ne "CODE") && ($X_replacement =~ m/$X_SUB_REPLACE/)) {    if ($X_replacement =~ m/$X_INDEXED/) { # a simple lookup? (eg "$2")      # store the index (used for fast retrieval of matched strings)      $X_replacement = substr($X_replacement,1) - 1;    }    else { # a complicated lookup (eg "Hello $2 $1")      my $i = $length;      while ($i) { # Had difficulty getting Perl to do Dean's splitting and joining of strings containing $'s        my $str = '$a[$o+' . ($i-1) . ']'; # eg $a[$o+1]        $X_replacement =~ s/\$$i/$str/;      # eg $2 $3 -> $a[$o+1] $a[$o+2]        $i--;      }      # build a function to do the lookup - returns interpolated string of array lookups      $X_replacement = eval('sub {my $o=pop; my @a=@_; return "' . $X_replacement . '"};');    }  }  else {}  # pass the modified arguments  &_X_add($expression || q/^$/, $X_replacement, $length);}
# execute the global replacementsub exec {#print Dumper(@_X_patterns);  my ($self, $X_string) = @_;  my $escChar    = $self->escapeChar();  my $ignoreCase = $self->ignoreCase();  my ($regexp,$captures) = &_getPatterns();  # Concatenated and parenthesized regexp eg '(regex1)|(regex2)|(regex3)' etc  $X_string = &_X_escape($X_string, $escChar);  if ($ignoreCase) {$X_string =~ s/$regexp/{&_X_replacement(&_matchVars($captures,\$X_string))}/gie} # Pass $X_String as a    else           {$X_string =~ s/$regexp/{&_X_replacement(&_matchVars($captures,\$X_string))}/ge}  # reference for speed
  $X_string = &_X_unescape($X_string, $escChar);  $X_string =~ s/$XX_DELETED//g;  return $X_string;}
sub _X_add {  push (@_X_patterns, [@_]); # Save each argument set as is into an array of arrays}
# this is the global replace function (it's quite complicated)sub _X_replacement {  my (@arguments) = @_;#print Dumper (@arguments);  if ($arguments[0] le '') {return ''}  # Dereference last index (source String) here - faster than in _matchVars (maybe not needed at all?)  $arguments[$#arguments] = ${$arguments[$#arguments]};  my $i = 1;  # loop through the patterns  for (my $j=0; $j<scalar(@_X_patterns); $j++) { # Loop through global all @_X_patterns    my @X_pattern = @{$_X_patterns[$j]};    # do we have a result? NB: "if ($arguments[$i])" as in Dean's Javascript is false for the value 0!!!    if ((defined $arguments[$i]) && ($arguments[$i] gt '')) {      my $X_replacement = $X_pattern[$X_REPLACEMENT];      # switch on type of $replacement      if (ref($X_replacement) eq "CODE") {     # function        return &$X_replacement(@arguments,$i);      }      elsif ($X_replacement =~ m/$DIGIT/) {    # number (contains no non-digits)        return $arguments[$X_replacement + $i];      }      else { # default        return $X_replacement;                 # default      }    } # skip over references to sub-expressions    else {$i += $X_pattern[$X_LENGTH]}  }}
######################## Private functions#######################
# encode escaped characterssub _X_escape {  my ($X_string, $X_escapeChar) = @_;  if ($X_escapeChar) {    my $re = '\\'.$X_escapeChar.'(.)';    $X_string =~ s/$re/{push(@_X_escaped,$1); $X_escapeChar}/ge;  }  return $X_string;}
# decode escaped characterssub _X_unescape {  my ($X_string, $X_escapeChar) = @_;  if ($X_escapeChar) { # We'll only do this if there is an $X_escapeChar!    my $re = '\\'.$X_escapeChar;    $X_string =~ s/$re/{$X_escapeChar . (shift(@_X_escaped))}/ge; # Don't use Dean Edwards as below 'or' here - because zero will return ''!  # $X_string =~ s/$re/{$X_escapeChar . (shift(@_X_escaped) || '')}/ge;  }  return $X_string;}
sub _X_internalEscape {  my ($string) = shift;  $string =~ s/$XX_ESCAPE//g;  return $string;}
# Builds an array of match variables to (approximately) emulate that available in Javascript String.replace()sub _matchVars {  my ($m,$sref) = @_;  my @args = (1..$m);                # establish the number potential memory variables  my @mv = map {eval("\$$_")} @args; # matchvarv[1..m] = the memory variables $1 .. $m  unshift (@mv, $&);                 # matchvar[0]     = the substring that matched  push    (@mv, length($`));         # matchvar[m+1]   =  offset within the source string where the match occurred (= length of prematch string)  push    (@mv, $sref);              # matchvar[m+2]   = reference to full source string (dereference in caller if/when needed)#print Dumper (@mv);  return @mv;}
sub _getPatterns {  my @Patterns = ();  my $lcp = 0;  for (my $i=0; $i<scalar(@_X_patterns); $i++) {       # Loop through global all @_patterns    push (@Patterns, $_X_patterns[$i][$X_EXPRESSION]); # accumulate the expressions    $lcp += $_X_patterns[$i][$X_LENGTH];               # sum the left capturing parenthesis counts  }  my $str = "(" . join(')|(',@Patterns). ")";          # enclose each pattern in () separated by "|"  return ($str, $lcp);}
################### END            ###################1; # ParseMaster ###################