kareila: (Default)
kareila ([personal profile] kareila) wrote in [site community profile] changelog2010-11-19 06:53 pm

[dw-free] regexp cleanup

[commit: http://hg.dwscoalition.org/dw-free/rev/5c264686320f]

http://bugs.dwscoalition.org/show_bug.cgi?id=2792

New LJ::Entry->summarize method, with more efficient regexp.

Patch by [personal profile] kareila.

Files modified:
  • cgi-bin/LJ/Entry.pm
  • cgi-bin/ljfeed.pl
--------------------------------------------------------------------------------
diff -r a7f7253b1f43 -r 5c264686320f cgi-bin/LJ/Entry.pm
--- a/cgi-bin/LJ/Entry.pm	Thu Nov 18 13:28:59 2010 +0800
+++ b/cgi-bin/LJ/Entry.pm	Fri Nov 19 12:53:08 2010 -0600
@@ -769,14 +769,20 @@ sub event_summary {
     my $url = $self->url;
     my $readmore = "<b>(<a href=\"$url\">Read more ...</a>)</b>";
 
-    my $event = $self->event_html;
+    return LJ::Entry->summarize( $self->event_html, $readmore );
+}
+
+# class method for truncation
+sub summarize {
+    my ( $class, $event, $readmore ) = @_;
+    return '' unless defined $event;
 
     # assume the first paragraph is terminated by two <br> or a </p>
     # valid XML tags should be handled, even though it makes an uglier regex
-    if ($event =~ m!((<br\s*/?\>(</br\s*>)?\s*){2})|(</p\s*>)!i) {
+    if ( $event =~ m!(.*?(?:(?:<br\s*/?>(?:</br\s*>)?\s*){2}|</p\s*>))!i ) {
         # everything before the matched tag + the tag itself
         # + a link to read more
-        $event = $` . $& . $readmore;
+        $event = $1 . $readmore;
     }
     return $event;
 }
diff -r a7f7253b1f43 -r 5c264686320f cgi-bin/ljfeed.pl
--- a/cgi-bin/ljfeed.pl	Thu Nov 18 13:28:59 2010 +0800
+++ b/cgi-bin/ljfeed.pl	Fri Nov 19 12:53:08 2010 -0600
@@ -231,14 +231,7 @@ sub make_feed
             # do this after clean so we don't have to about know whether or not
             # the event is preformatted
             if ($u->{'opt_synlevel'} eq 'summary') {
-
-                # assume the first paragraph is terminated by two <br> or a </p>
-                # valid XML tags should be handled, even though it makes an uglier regex
-                if ($event =~ m!((<br\s*/?\>(</br\s*>)?\s*){2})|(</p\s*>)!i) {
-                    # everything before the matched tag + the tag itself
-                    # + a link to read more
-                    $event = $` . $& . $readmore;
-                }
+                $event = LJ::Entry->summarize( $event, $readmore );
             }
 
             while ($event =~ /<(?:lj-)?poll-(\d+)>/g) {
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org