mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-04-14 07:31 am

[dw-free] synsuck silently dropping entries with certain characters

[commit: http://hg.dwscoalition.org/dw-free/rev/3f8131e94e28]

http://bugs.dwscoalition.org/show_bug.cgi?id=640

Perl 5.10 necessitates a change to the way we do UTF8 magic flag removal.
This patch fixes that, but still lets it work in older versions of Perl.

Patch by [personal profile] alierak.

Files modified:
  • cgi-bin/Apache/LiveJournal.pm
  • cgi-bin/LJ/SynSuck.pm
  • cgi-bin/cleanhtml.pl
  • cgi-bin/ljlib.pl
  • src/jbackup/jbackup.pl
--------------------------------------------------------------------------------
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/Apache/LiveJournal.pm
--- a/cgi-bin/Apache/LiveJournal.pm	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/Apache/LiveJournal.pm	Tue Apr 14 07:31:36 2009 +0000
@@ -1736,7 +1736,7 @@ sub xmlrpc_method {
     if (ref $req eq "HASH") {
         foreach my $key ('subject', 'event') {
             # get rid of the UTF8 flag in scalars
-            $req->{$key} = pack('C*', unpack('C*', $req->{$key}))
+            $req->{$key} = LJ::no_utf8_flag ( $req->{$key} )
                 if $req->{$key};
         }
     }
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/LJ/SynSuck.pm
--- a/cgi-bin/LJ/SynSuck.pm	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/LJ/SynSuck.pm	Tue Apr 14 07:31:36 2009 +0000
@@ -241,7 +241,7 @@ sub process_content {
         # for us behind our back in random places all over
         # http://zilla.livejournal.org/show_bug.cgi?id=1037
         foreach my $attr (qw(id subject text link)) {
-            $it->{$attr} = pack('C*', unpack('C*', $it->{$attr}));
+            $it->{$attr} = LJ::no_utf8_flag ( $it->{$attr} );
         }
 
         my $dig = LJ::md5_struct($it)->b64digest;
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/cleanhtml.pl
--- a/cgi-bin/cleanhtml.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/cleanhtml.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -440,7 +440,7 @@ sub clean
                     if ($attr->{'text'}) {
                         $text = $attr->{'text'};
                         if ($text =~ /[^\x01-\x7f]/) {
-                            $text = pack('C*', unpack('C*', $text));
+                            $text = LJ::no_utf8_flag ( $text );
                         }
                         $text =~ s/</&lt;/g;
                         $text =~ s/>/&gt;/g;
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/ljlib.pl
--- a/cgi-bin/ljlib.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/ljlib.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -2275,7 +2275,7 @@ sub md5_struct
         # see http://zilla.livejournal.org/show_bug.cgi?id=851
         eval { $md5->add($st); };
         if ($@) {
-            $st = pack('C*', unpack('C*', $st));
+            $st = LJ::no_utf8_flag ( $st );
             $md5->add($st);
         }
         return $md5;
@@ -2677,8 +2677,14 @@ sub assert_is {
                caller => [caller()])->throw;
 }
 
+# no_utf8_flag previously used pack('C*',unpack('C*', $_[0]))
+# but that stopped working in Perl 5.10; see
+# http://bugs.dwscoalition.org/show_bug.cgi?id=640
 sub no_utf8_flag {
-    return pack('C*', unpack('C*', $_[0]));
+    # tell Perl to ignore the SvUTF8 flag in this scope.
+    use bytes;
+    # make a copy of the input string that doesn't have the flag at all.
+    return substr($_[0], 0);
 }
 
 # return true if root caller is a test file
diff -r 558c2e746b2a -r 3f8131e94e28 src/jbackup/jbackup.pl
--- a/src/jbackup/jbackup.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/src/jbackup/jbackup.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -419,7 +419,8 @@ sub save_event {
     # DO NOT SET REALTIME HERE.  It is set by syncitems.
     foreach (qw(subject anum event eventtime security allowmask poster)) {
         next unless $data->{$_};
-        my $tmp = pack('C*', unpack('C*', $data->{$_}));
+        use bytes;
+        my $tmp = substr($data->{$_}, 0);
         $bak{"event:$_:$id"} = $tmp;
     }
     my @props;
@@ -463,7 +464,8 @@ sub save_comment {
         next unless $data->{$_};
         # GDBM doesn't deal with UTF-8, it only wants a string of bytes, so let's do that
         # by clearing the UTF-8 flag on our input scalars.
-        my $tmp = pack('C*', unpack('C*', $data->{$_}));
+        use bytes;
+        my $tmp = substr($data->{$_}, 0);
         $bak{"comment:$_:$data->{id}"} = $tmp;
     }
 }
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org