fu: Close-up of Fu, bringing a scoop of water to her mouth (Default)
fu ([personal profile] fu) wrote in [site community profile] changelog2011-01-30 12:49 pm

[dw-free] new feeds pulling in entries in reverse order

[commit: http://hg.dwscoalition.org/dw-free/rev/2a8cca4347d9]

http://bugs.dwscoalition.org/show_bug.cgi?id=2623

Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.

Patch by [personal profile] fu.

Files modified:
  • cgi-bin/LJ/SynSuck.pm
  • t/synsuck.t
--------------------------------------------------------------------------------
diff -r c3fd821250d8 -r 2a8cca4347d9 cgi-bin/LJ/SynSuck.pm
--- a/cgi-bin/LJ/SynSuck.pm	Wed Jan 26 14:16:24 2011 +0800
+++ b/cgi-bin/LJ/SynSuck.pm	Sun Jan 30 20:48:49 2011 +0800
@@ -122,14 +122,13 @@ sub get_content {
     return [$res, $content];
 }
 
-sub process_content {
-    my ($urow, $resp, $verbose) = @_;
-
-    my ($res, $content) = @$resp;
-    my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
-        map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
-
-    my $dbh = LJ::get_db_writer();
+# helper function which takes feed XML
+# and returns a list of $num items from the feed
+# in proper order
+sub parse_items_from_feed {
+    my ( $content, $num, $verbose ) = @_;
+    $num ||= 20;
+    return ( 0, { type => "noitems" } ) unless defined $content;
 
     # WARNING: blatant XML spec violation ahead...
     #
@@ -165,18 +164,52 @@ sub process_content {
     }
 
     # parsing time...
-    my ($feed, $error) = LJ::ParseFeed::parse_feed($content);
-    if ($error) {
-        # parse error!
-        print "Parse error! $error\n" if $verbose;
-        delay($userid, 3*60, "parseerror");
-        $error =~ s! at /.*!!;
-        $error =~ s/^\n//; # cleanup of newline at the beginning of the line
-        my $syn_u = LJ::load_user( $user );
-        $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
-        return;
+    my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $content );
+    return ( 0, { type => "parseerror", message => $error } ) if $error;
+
+    # another sanity check
+    return ( 0, { type => "noitems" } ) unless ref $feed->{items} eq "ARRAY";
+
+    my @items = reverse @{$feed->{'items'}};
+
+    # take most recent 20
+    splice( @items, 0, @items - $num ) if @items > $num;
+
+    return ( 1, { items => \@items, feed => $feed } );
+}
+
+
+sub process_content {
+    my ($urow, $resp, $verbose) = @_;
+
+    my ($res, $content) = @$resp;
+    my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
+        map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
+
+    my $dbh = LJ::get_db_writer();
+
+    my ( $ok, $rv ) = parse_items_from_feed( $content, 20, $verbose );
+    unless ( $ok ) {
+        if ( $rv->{type} eq "parseerror" ) {
+            # parse error!
+            delay( $userid, 3*60, "parseerror" );
+            if ( my $error = $rv->{message} ) {
+                print "Parse error! $error\n" if $verbose;
+                $error =~ s! at /.*!!;
+                $error =~ s/^\n//; # cleanup of newline at the beginning of the line
+                my $syn_u = LJ::load_user( $user );
+                $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
+            }
+            return;
+        } elsif ( $rv->{type} eq "noitems" ) {
+            return delay( $userid, 3*60, "noitems" );
+        } else {
+            print "Unknown error type!\n" if $verbose;
+            return delay( $userid, 3*60, "unknown" );
+        }
     }
 
+    my $feed = $rv->{feed};
     # register feeds that can support hubbub.
     if ( LJ::is_enabled( 'hubbub' ) && $feed->{self} && $feed->{hub} ) {
         # this is a square operation.  register every "self" and the feed url along
@@ -191,15 +224,7 @@ sub process_content {
         }
     }
 
-    # another sanity check
-    unless (ref $feed->{'items'} eq "ARRAY") {
-        return delay($userid, 3*60, "noitems");
-    }
-
-    my @items = reverse @{$feed->{'items'}};
-
-    # take most recent 20
-    splice(@items, 0, @items-20) if @items > 20;
+    my @items = @{$rv->{items}};
 
     # delete existing items older than the age which can show on a
     # friends view.
diff -r c3fd821250d8 -r 2a8cca4347d9 t/synsuck.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t/synsuck.t	Sun Jan 30 20:48:49 2011 +0800
@@ -0,0 +1,526 @@
+# -*-perl-*-
+use strict;
+use Test::More tests => 26;
+
+use lib "$ENV{LJHOME}/cgi-bin";
+require 'ljlib.pl';
+
+use LJ::SynSuck;
+
+
+sub err {
+    my ( $content, $type, $test ) = @_;
+
+    my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content );
+    ok( ! $ok, $test );
+    is( $rv->{type}, $type, $rv->{message} ? " $test - $rv->{message}" : $test );
+}
+
+sub success {
+    my ( $content, $test, %opts ) = @_;
+
+    my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content, $opts{num_items} );
+    ok( $ok, $test );
+    die $rv->{message} unless $ok;
+
+    return @{$rv->{items}};
+};
+
+
+note("Error");
+{
+    my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+        <channel>
+            <title>Blah
+        </channel>
+    </rss>
+    };
+
+    err( $content, "parseerror", "Mismatched tags" );
+}
+
+note("No items");
+{
+    my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 00:00:00 GMT</pubDate>
+    </channel>
+    </rss>
+    };
+
+    err( $content, "noitems", "Empty feed" );
+}
+
+
+note("RSS pubDate - descending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS pubDate (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS pubDate - ascending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS pubDate (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Atom - descending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <published>2011-01-23T17:38:49-08:00</published>
+            <updated>2011-01-23T17:38:49-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <published>2011-01-23T13:59:55-08:00</published>
+            <updated>2011-01-23T13:59:55-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <published>2011-01-23T13:58:08-08:00</published>
+            <updated>2011-01-23T13:58:08-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+    </feed>};
+
+    my @items = success( $content, "Correct order from Atom (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note( "Atom - ascending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <published>2011-01-23T13:58:08-08:00</published>
+            <updated>2011-01-23T13:58:08-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <published>2011-01-23T13:59:55-08:00</published>
+            <updated>2011-01-23T13:59:55-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <published>2011-01-23T17:38:49-08:00</published>
+            <updated>2011-01-23T17:38:49-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+    </feed>};
+
+    my @items = success( $content, "Correct order from Atom (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note("RSS dc:date - descending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <dc:date>2011-01-24T03:00:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <dc:date>2011-01-23T05:30:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <dc:date>2011-01-17T20:00:00Z</dc:date>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS dc:date (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS dc:date - ascending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <dc:date>2011-01-17T20:00:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <dc:date>2011-01-23T05:30:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <dc:date>2011-01-24T03:00:00Z</dc:date>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS dc:date (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Without datestamp - descending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+    </feed>};
+
+    my @items = success( $content, "Correct order without datestamps (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally without datestamps in descending order)" );
+}
+
+note( "Without datestamp - ascending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+    </feed>};
+
+    my @items = success( $content, "Correct order without datestamps (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 3, 2, 1 ], "Items from feed returned in what we guessed is the correct order (originally without datestamps in ascending order)" );
+}
+
+
+note( "Active feed - too many items - descending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in descending order)" );
+}
+
+note( "Active feed - too many items - ascending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in ascending order)" );
+}
+
+note( "Active feed - too many items - no datestamp ascending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed (guessed)", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 1 ], "Returned what we guessed are the latest two items from feed (originally without datestamps in ascending order)" );
+}
--------------------------------------------------------------------------------