kareila: (Default)
kareila ([personal profile] kareila) wrote in [site community profile] changelog2010-11-19 06:53 pm

[dw-free] regexp cleanup

[commit: http://hg.dwscoalition.org/dw-free/rev/5c264686320f]

http://bugs.dwscoalition.org/show_bug.cgi?id=2792

New LJ::Entry->summarize method, with more efficient regexp.

Patch by [personal profile] kareila.

Files modified:
  • cgi-bin/LJ/Entry.pm
  • cgi-bin/ljfeed.pl
--------------------------------------------------------------------------------
diff -r a7f7253b1f43 -r 5c264686320f cgi-bin/LJ/Entry.pm
--- a/cgi-bin/LJ/Entry.pm	Thu Nov 18 13:28:59 2010 +0800
+++ b/cgi-bin/LJ/Entry.pm	Fri Nov 19 12:53:08 2010 -0600
@@ -769,14 +769,20 @@ sub event_summary {
     my $url = $self->url;
     my $readmore = "<b>(<a href=\"$url\">Read more ...</a>)</b>";
 
-    my $event = $self->event_html;
+    return LJ::Entry->summarize( $self->event_html, $readmore );
+}
+
+# class method for truncation
+sub summarize {
+    my ( $class, $event, $readmore ) = @_;
+    return '' unless defined $event;
 
     # assume the first paragraph is terminated by two <br> or a </p>
     # valid XML tags should be handled, even though it makes an uglier regex
-    if ($event =~ m!((<br\s*/?\>(</br\s*>)?\s*){2})|(</p\s*>)!i) {
+    if ( $event =~ m!(.*?(?:(?:<br\s*/?>(?:</br\s*>)?\s*){2}|</p\s*>))!i ) {
         # everything before the matched tag + the tag itself
         # + a link to read more
-        $event = $` . $& . $readmore;
+        $event = $1 . $readmore;
     }
     return $event;
 }
diff -r a7f7253b1f43 -r 5c264686320f cgi-bin/ljfeed.pl
--- a/cgi-bin/ljfeed.pl	Thu Nov 18 13:28:59 2010 +0800
+++ b/cgi-bin/ljfeed.pl	Fri Nov 19 12:53:08 2010 -0600
@@ -231,14 +231,7 @@ sub make_feed
             # do this after clean so we don't have to about know whether or not
             # the event is preformatted
             if ($u->{'opt_synlevel'} eq 'summary') {
-
-                # assume the first paragraph is terminated by two <br> or a </p>
-                # valid XML tags should be handled, even though it makes an uglier regex
-                if ($event =~ m!((<br\s*/?\>(</br\s*>)?\s*){2})|(</p\s*>)!i) {
-                    # everything before the matched tag + the tag itself
-                    # + a link to read more
-                    $event = $` . $& . $readmore;
-                }
+                $event = LJ::Entry->summarize( $event, $readmore );
             }
 
             while ($event =~ /<(?:lj-)?poll-(\d+)>/g) {
--------------------------------------------------------------------------------