[dw-free] synsuck silently dropping entries with certain characters
[commit: http://hg.dwscoalition.org/dw-free/rev/3f8131e94e28]
http://bugs.dwscoalition.org/show_bug.cgi?id=640
Perl 5.10 necessitates a change to the way we do UTF8 magic flag removal.
This patch fixes that, but still lets it work in older versions of Perl.
Patch by
alierak.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=640
Perl 5.10 necessitates a change to the way we do UTF8 magic flag removal.
This patch fixes that, but still lets it work in older versions of Perl.
Patch by
![[personal profile]](https://www.dreamwidth.org/img/silk/identity/user.png)
Files modified:
- cgi-bin/Apache/LiveJournal.pm
- cgi-bin/LJ/SynSuck.pm
- cgi-bin/cleanhtml.pl
- cgi-bin/ljlib.pl
- src/jbackup/jbackup.pl
-------------------------------------------------------------------------------- diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/Apache/LiveJournal.pm --- a/cgi-bin/Apache/LiveJournal.pm Tue Apr 14 05:10:56 2009 +0000 +++ b/cgi-bin/Apache/LiveJournal.pm Tue Apr 14 07:31:36 2009 +0000 @@ -1736,7 +1736,7 @@ sub xmlrpc_method { if (ref $req eq "HASH") { foreach my $key ('subject', 'event') { # get rid of the UTF8 flag in scalars - $req->{$key} = pack('C*', unpack('C*', $req->{$key})) + $req->{$key} = LJ::no_utf8_flag ( $req->{$key} ) if $req->{$key}; } } diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/LJ/SynSuck.pm --- a/cgi-bin/LJ/SynSuck.pm Tue Apr 14 05:10:56 2009 +0000 +++ b/cgi-bin/LJ/SynSuck.pm Tue Apr 14 07:31:36 2009 +0000 @@ -241,7 +241,7 @@ sub process_content { # for us behind our back in random places all over # http://zilla.livejournal.org/show_bug.cgi?id=1037 foreach my $attr (qw(id subject text link)) { - $it->{$attr} = pack('C*', unpack('C*', $it->{$attr})); + $it->{$attr} = LJ::no_utf8_flag ( $it->{$attr} ); } my $dig = LJ::md5_struct($it)->b64digest; diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/cleanhtml.pl --- a/cgi-bin/cleanhtml.pl Tue Apr 14 05:10:56 2009 +0000 +++ b/cgi-bin/cleanhtml.pl Tue Apr 14 07:31:36 2009 +0000 @@ -440,7 +440,7 @@ sub clean if ($attr->{'text'}) { $text = $attr->{'text'}; if ($text =~ /[^\x01-\x7f]/) { - $text = pack('C*', unpack('C*', $text)); + $text = LJ::no_utf8_flag ( $text ); } $text =~ s/</</g; $text =~ s/>/>/g; diff -r 558c2e746b2a -r 3f8131e94e28 cgi-bin/ljlib.pl --- a/cgi-bin/ljlib.pl Tue Apr 14 05:10:56 2009 +0000 +++ b/cgi-bin/ljlib.pl Tue Apr 14 07:31:36 2009 +0000 @@ -2275,7 +2275,7 @@ sub md5_struct # see http://zilla.livejournal.org/show_bug.cgi?id=851 eval { $md5->add($st); }; if ($@) { - $st = pack('C*', unpack('C*', $st)); + $st = LJ::no_utf8_flag ( $st ); $md5->add($st); } return $md5; @@ -2677,8 +2677,14 @@ sub assert_is { caller => [caller()])->throw; } +# no_utf8_flag previously used pack('C*',unpack('C*', $_[0])) +# but that stopped working in Perl 5.10; see +# http://bugs.dwscoalition.org/show_bug.cgi?id=640 sub no_utf8_flag { - return pack('C*', unpack('C*', $_[0])); + # tell Perl to ignore the SvUTF8 flag in this scope. + use bytes; + # make a copy of the input string that doesn't have the flag at all. + return substr($_[0], 0); } # return true if root caller is a test file diff -r 558c2e746b2a -r 3f8131e94e28 src/jbackup/jbackup.pl --- a/src/jbackup/jbackup.pl Tue Apr 14 05:10:56 2009 +0000 +++ b/src/jbackup/jbackup.pl Tue Apr 14 07:31:36 2009 +0000 @@ -419,7 +419,8 @@ sub save_event { # DO NOT SET REALTIME HERE. It is set by syncitems. foreach (qw(subject anum event eventtime security allowmask poster)) { next unless $data->{$_}; - my $tmp = pack('C*', unpack('C*', $data->{$_})); + use bytes; + my $tmp = substr($data->{$_}, 0); $bak{"event:$_:$id"} = $tmp; } my @props; @@ -463,7 +464,8 @@ sub save_comment { next unless $data->{$_}; # GDBM doesn't deal with UTF-8, it only wants a string of bytes, so let's do that # by clearing the UTF-8 flag on our input scalars. - my $tmp = pack('C*', unpack('C*', $data->{$_})); + use bytes; + my $tmp = substr($data->{$_}, 0); $bak{"comment:$_:$data->{id}"} = $tmp; } } --------------------------------------------------------------------------------