fu: Close-up of Fu, bringing a scoop of water to her mouth (Default)
fu ([personal profile] fu) wrote in [site community profile] changelog2011-02-16 11:19 am

[dw-free] Duplicate xmlns attributes in atom feeds

[commit: http://hg.dwscoalition.org/dw-free/rev/f1844ce3fd56]

http://bugs.dwscoalition.org/show_bug.cgi?id=3511

Remove the normalize_ns sub (not actually effective, and causes this issue0.
Append namespace by adding an attribute instead to the feed element. Use
$LJ::SITEABBREV, rather than hardcoding lj: for the namespace prefix. Fix
the namespace for category.

Patch by [personal profile] fu.

Files modified:
  • cgi-bin/ljfeed.pl
  • t/feed-atom.t
--------------------------------------------------------------------------------
diff -r bfecc8c7ffb6 -r f1844ce3fd56 cgi-bin/ljfeed.pl
--- a/cgi-bin/ljfeed.pl	Wed Feb 16 16:38:51 2011 +0800
+++ b/cgi-bin/ljfeed.pl	Wed Feb 16 19:19:03 2011 +0800
@@ -291,6 +291,13 @@ sub make_feed
     return $viewfunc->{handler}->($journalinfo, $u, $opts, \@cleanitems, \@entries);
 }
 
+# helper method to add a namespace to the root of a feed
+sub _add_feed_namespace {
+    my ( $feed, $ns_prefix, $namespace ) = @_;
+    my $doc = $feed->elem->ownerDocument->getDocumentElement;
+    $doc->setAttribute( "xmlns:$ns_prefix", $namespace );
+}
+
 # helper method for create_view_rss and create_view_comments
 sub _init_talkview {
     my ( $journalinfo, $u, $opts, $talkview ) = @_;
@@ -402,19 +409,10 @@ sub create_view_atom
 sub create_view_atom
 {
     my ( $j, $u, $opts, $cleanitems, $entrylist ) = @_;
-    my ( $feed, $xml, $ns );
+    my ( $feed, $xml, $ns, $site_ns_prefix );
 
+    $site_ns_prefix = lc $LJ::SITENAMEABBREV;
     $ns = "http://www.w3.org/2005/Atom";
-
-    # Strip namespace from child tags. Set default namespace, let
-    # child tags inherit from it. Do it manually; LibXML can't on its own.
-    my $normalize_ns = sub {
-        my $str = shift;
-        $str =~ s/(<\w+)\s+xmlns="\Q$ns\E"/$1/og;
-        $str =~ s/<feed\b/<feed xmlns="$ns" xmlns:lj="$LJ::SITEROOT"/;
-        $str =~ s/<entry>/<entry xmlns="$ns" xmlns:lj="$LJ::SITEROOT">/ if $opts->{'single_entry'};
-        return $str;
-    };
 
     # AtomAPI interface path
     my $api = $opts->{'apilinks'} ? $u->atom_service_document :
@@ -441,7 +439,7 @@ sub create_view_atom
         $xml  = $feed->elem->ownerDocument;
 
         if ($u->should_block_robots) {
-            $xml->getDocumentElement->setAttribute( "xmlns:idx", "urn:atom-extension:indexing" );
+            _add_feed_namespace( $feed, "idx", "urn:atom-extension:indexing" );
             $xml->getDocumentElement->setAttribute( "idx:index", "no" );
         }
 
@@ -466,7 +464,7 @@ sub create_view_atom
         );
         $feed->updated( LJ::time_to_w3c($j->{'modtime'}, 'Z') );
 
-        my $ljinfo = $xml->createElement( 'lj:journal' );
+        my $ljinfo = $xml->createElement( "$site_ns_prefix:journal" );
         $ljinfo->setAttribute( 'username', LJ::exml($u->user) );
         $ljinfo->setAttribute( 'type', LJ::exml($u->journaltype_readable) );
         $xml->getDocumentElement->appendChild( $ljinfo );
@@ -502,7 +500,7 @@ sub create_view_atom
             $entry->author( $author );
 
             # and the lj-specific stuff
-            my $postauthor = $entry_xml->createElement( 'lj:poster' );
+            my $postauthor = $entry_xml->createElement( "$site_ns_prefix:poster" );
             $postauthor->setAttribute( 'user', LJ::exml($poster->user));
             $entry_xml->getDocumentElement->appendChild( $postauthor );
         }
@@ -536,7 +534,7 @@ sub create_view_atom
         $entry->updated(   LJ::time_to_w3c($it->{modtime},    "Z") );
 
         foreach my $tag ( @{$it->{tags} || []} ) {
-            my $category = XML::Atom::Category->new;
+            my $category = XML::Atom::Category->new( Version => 1 );
             $category->term( $tag );
             $entry->add_category( $category );
         }
@@ -550,7 +548,7 @@ sub create_view_atom
         foreach ( @currents ) {
             my ( $key, $val ) = @$_;
             if ( defined $val ) {
-                my $elem = $entry_xml->createElement( "lj:$key" );
+                my $elem = $entry_xml->createElement( "$site_ns_prefix:$key" );
                 $elem->appendTextNode( $val );
                 $entry_xml->getDocumentElement->appendChild( $elem );
             }
@@ -582,14 +580,16 @@ sub create_view_atom
         }
 
         if ( $opts->{'single_entry'} ) {
-            return $normalize_ns->( $entry->as_xml() );
+            _add_feed_namespace( $entry, $site_ns_prefix, $LJ::SITEROOT );
+            return $entry->as_xml;
         }
         else {
             $feed->add_entry( $entry );
         }
     }
 
-    return $normalize_ns->( $feed->as_xml() );
+    _add_feed_namespace( $feed, $site_ns_prefix, $LJ::SITEROOT );
+    return $feed->as_xml;
 }
 
 # create a FOAF page for a user
diff -r bfecc8c7ffb6 -r f1844ce3fd56 t/feed-atom.t
--- a/t/feed-atom.t	Wed Feb 16 16:38:51 2011 +0800
+++ b/t/feed-atom.t	Wed Feb 16 19:19:03 2011 +0800
@@ -1,7 +1,7 @@
 # check the atom feeds that we generate
 
 use strict;
-use Test::More tests => 4;
+use Test::More tests => 10;
 
 use lib "$ENV{LJHOME}/cgi-bin";
 require 'ljlib.pl';
@@ -18,29 +18,40 @@ my $r = DW::Request::Standard->new(
 
 note( "Empty feed" );
 {
-    my $feed = LJ::ParseFeed::parse_feed(
+    my ( $feed, $error ) = LJ::ParseFeed::parse_feed(
                 LJ::Feed::make_feed( $r, $u, $remote, { pathextra => "/atom" } ),
                 "atom" );
-    is_deeply( $feed, [], "Empty, but parseable, feed" );
+    is( $feed->{link}, $u->journal_base . "/" );
+    is( $feed->{type}, "atom" );
+    is_deeply( $feed->{items}, [], "Empty, but parseable, feed" );
 }
 
+my $e1 = $u->t_post_fake_entry( subject => "test post in feed (subject)", event => "test post in feed (body)" );
+my $e2 = $u->t_post_fake_entry;
 {
     note( "Posted entry: entire feed" );
-    my $e1 = $u->t_post_fake_entry( subject => "test post in feed (subject)", event => "test post in feed (body)" );
-    my $e2 = $u->t_post_fake_entry;
+    my $feed_xml = LJ::Feed::make_feed( $r, $u, $remote, { pathextra => "/atom" } );
 
-    my $feed = LJ::ParseFeed::parse_feed(
-                LJ::Feed::make_feed( $r, $u, $remote, { pathextra => "/atom" } ),
-                "atom" );
+    my $parser = new XML::Parser( Style => 'Objects' );
+    my $parsed = $parser->parse( $feed_xml );
+    $parsed = $parsed->[0];
+    delete $parsed->{Kids};
+    is_deeply( $parsed,
+        {
+            'xmlns'     => "http://www.w3.org/2005/Atom",
+            'xmlns:dw'  => $LJ::SITEROOT
+        }, "Check namespaces for feed" );
+
+    my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $feed_xml, "atom" );
 
     my $userid = $u->userid;
     my $e1id = $e1->ditemid;
 
-    my $feed_entryid = delete $feed->[1]->{id};
-    delete $feed->[0]->{id};
+    my $feed_entryid = delete $feed->{items}->[1]->{id};
+    delete $feed->{items}->[0]->{id};
     like( $feed_entryid, qr/tag:$LJ::DOMAIN,\d{4}-\d{2}-\d{2}:$userid:$e1id/, "Feed entry id" );
 
-    is_deeply( $feed, [{
+    is_deeply( $feed->{items}, [{
         link    => $e2->url,
         subject => $e2->subject_raw,
         text    => $e2->event_raw,
@@ -55,17 +66,79 @@ note( "Empty feed" );
 
     note( "Posted entry: individual item" );
     my $e2id = $e2->ditemid;
-    $r = DW::Request::Standard->new(
+    my $r2 = DW::Request::Standard->new(
             HTTP::Request->new( GET => $u->journal_base . "/data/atom?itemid=$e2id" ) );
 
-    $feed = LJ::ParseFeed::parse_feed(
-            LJ::Feed::make_feed( $r, $u, $remote, { pathextra => "/atom" } ),
-            "atom" );
-    delete $feed->[0]->{id};
-    is_deeply( $feed->[0], {
+    $feed_xml = LJ::Feed::make_feed( $r2, $u, $remote, { pathextra => "/atom" } );
+    ( $feed, $error ) = LJ::ParseFeed::parse_feed( $feed_xml, "atom" );
+    delete $feed->{items}->[0]->{id};
+    is_deeply( $feed->{items}->[0], {
         link    => $e2->url,
         subject => $e2->subject_raw,
         text    => $e2->event_raw,
         time    => substr( $e2->eventtime_mysql, 0, -3 ),
     }, "Check individual entry from feed" );
 }
+
+note( "Icon feed" );
+SKIP: {
+    my $num_tests = 1;
+
+    use FindBin qw($Bin);
+    chdir "$Bin/data/userpics" or skip "Failed to chdir to t/data/userpics", $num_tests;
+    open ( my $fh, 'good.png' ) or skip "No icon", $num_tests;
+
+    my $ICON = do { local $/; <$fh> };
+    my $icon = LJ::Userpic->create( $u, data => \$ICON );
+
+    my $icons_r = DW::Request::Standard->new(
+            HTTP::Request->new( GET => $u->journal_base . "/data/userpics" ) );
+
+    my $feed_xml = LJ::Feed::make_feed( $icons_r, $u, $remote, { pathextra => "/userpics" } );
+
+    my $parser = new XML::Parser( Style => 'Objects' );
+    my $parsed = $parser->parse( $feed_xml );
+    $parsed = $parsed->[0];
+    delete $parsed->{Kids};
+    is_deeply( $parsed,
+        {
+            'xmlns'     => "http://www.w3.org/2005/Atom",
+        }, "Check namespaces for feed" );
+
+}
+
+note( "No bot crawling" );
+{
+    # block robots from crawling, but normal feed readers
+    # should still be able to read the feed
+    $u->set_prop( "opt_blockrobots", 1 );
+
+    my $feed_xml = LJ::Feed::make_feed( $r, $u, $remote, { pathextra => "/atom" } );
+
+    my $parser = new XML::Parser( Style => 'Objects' );
+    my $parsed = $parser->parse( $feed_xml );
+    $parsed = $parsed->[0];
+    delete $parsed->{Kids};
+    is_deeply( $parsed,
+        {
+            'xmlns'     => "http://www.w3.org/2005/Atom",
+            'xmlns:dw'  => $LJ::SITEROOT,
+            'xmlns:idx' => 'urn:atom-extension:indexing',
+            'idx:index' => 'no',
+        }, "Atom indexing extension" );
+
+    my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $feed_xml, "atom" );
+    delete $_->{id} foreach @{$feed->{items} || []};
+    is_deeply( $feed->{items}, [{
+        link    => $e2->url,
+        subject => $e2->subject_raw,
+        text    => $e2->event_raw,
+        time    => substr( $e2->eventtime_mysql, 0, -3 ),
+    }, {
+        link    => $e1->url,
+        subject => $e1->subject_raw,
+        text    => $e1->event_raw,
+        time    => substr( $e1->eventtime_mysql, 0, -3 ),
+    }], "Check entries from feed" );
+}
+
--------------------------------------------------------------------------------