mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-04-14 07:31 am

[dw-free] synsuck silently dropping entries with certain characters

[commit: http://hg.dwscoalition.org/dw-free/rev/3f8131e94e28]

http://bugs.dwscoalition.org/show_bug.cgi?id=640

Perl 5.10 necessitates a change to the way we do UTF8 magic flag removal.
This patch fixes that, but still lets it work in older versions of Perl.

Patch by [personal profile] alierak.

Files modified:
  • cgi-bin/Apache/LiveJournal.pm
  • cgi-bin/LJ/SynSuck.pm
  • cgi-bin/cleanhtml.pl
  • cgi-bin/ljlib.pl
  • src/jbackup/jbackup.pl
--------------------------------------------------------------------------------
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/Apache/LiveJournal.pm
--- a/cgi-bin/Apache/LiveJournal.pm	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/Apache/LiveJournal.pm	Tue Apr 14 07:31:36 2009 +0000
@@ -1736,7 +1736,7 @@ sub xmlrpc_method {
     if (ref $req eq "HASH") {
         foreach my $key ('subject', 'event') {
             # get rid of the UTF8 flag in scalars
-            $req->{$key} = pack('C*', unpack('C*', $req->{$key}))
+            $req->{$key} = LJ::no_utf8_flag ( $req->{$key} )
                 if $req->{$key};
         }
     }
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/LJ/SynSuck.pm
--- a/cgi-bin/LJ/SynSuck.pm	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/LJ/SynSuck.pm	Tue Apr 14 07:31:36 2009 +0000
@@ -241,7 +241,7 @@ sub process_content {
         # for us behind our back in random places all over
         # http://zilla.livejournal.org/show_bug.cgi?id=1037
         foreach my $attr (qw(id subject text link)) {
-            $it->{$attr} = pack('C*', unpack('C*', $it->{$attr}));
+            $it->{$attr} = LJ::no_utf8_flag ( $it->{$attr} );
         }
 
         my $dig = LJ::md5_struct($it)->b64digest;
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/cleanhtml.pl
--- a/cgi-bin/cleanhtml.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/cleanhtml.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -440,7 +440,7 @@ sub clean
                     if ($attr->{'text'}) {
                         $text = $attr->{'text'};
                         if ($text =~ /[^\x01-\x7f]/) {
-                            $text = pack('C*', unpack('C*', $text));
+                            $text = LJ::no_utf8_flag ( $text );
                         }
                         $text =~ s/</&lt;/g;
                         $text =~ s/>/&gt;/g;
diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/ljlib.pl
--- a/cgi-bin/ljlib.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/cgi-bin/ljlib.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -2275,7 +2275,7 @@ sub md5_struct
         # see http://zilla.livejournal.org/show_bug.cgi?id=851
         eval { $md5->add($st); };
         if ($@) {
-            $st = pack('C*', unpack('C*', $st));
+            $st = LJ::no_utf8_flag ( $st );
             $md5->add($st);
         }
         return $md5;
@@ -2677,8 +2677,14 @@ sub assert_is {
                caller => [caller()])->throw;
 }
 
+# no_utf8_flag previously used pack('C*',unpack('C*', $_[0]))
+# but that stopped working in Perl 5.10; see
+# http://bugs.dwscoalition.org/show_bug.cgi?id=640
 sub no_utf8_flag {
-    return pack('C*', unpack('C*', $_[0]));
+    # tell Perl to ignore the SvUTF8 flag in this scope.
+    use bytes;
+    # make a copy of the input string that doesn't have the flag at all.
+    return substr($_[0], 0);
 }
 
 # return true if root caller is a test file
diff -r 558c2e746b2a -r 3f8131e94e28 src/jbackup/jbackup.pl
--- a/src/jbackup/jbackup.pl	Tue Apr 14 05:10:56 2009 +0000
+++ b/src/jbackup/jbackup.pl	Tue Apr 14 07:31:36 2009 +0000
@@ -419,7 +419,8 @@ sub save_event {
     # DO NOT SET REALTIME HERE.  It is set by syncitems.
     foreach (qw(subject anum event eventtime security allowmask poster)) {
         next unless $data->{$_};
-        my $tmp = pack('C*', unpack('C*', $data->{$_}));
+        use bytes;
+        my $tmp = substr($data->{$_}, 0);
         $bak{"event:$_:$id"} = $tmp;
     }
     my @props;
@@ -463,7 +464,8 @@ sub save_comment {
         next unless $data->{$_};
         # GDBM doesn't deal with UTF-8, it only wants a string of bytes, so let's do that
         # by clearing the UTF-8 flag on our input scalars.
-        my $tmp = pack('C*', unpack('C*', $data->{$_}));
+        use bytes;
+        my $tmp = substr($data->{$_}, 0);
         $bak{"comment:$_:$data->{id}"} = $tmp;
     }
 }
--------------------------------------------------------------------------------