fu: Close-up of Fu, bringing a scoop of water to her mouth (Default)
fu ([personal profile] fu) wrote in [site community profile] changelog2011-01-30 12:49 pm

[dw-free] new feeds pulling in entries in reverse order

[commit: http://hg.dwscoalition.org/dw-free/rev/2a8cca4347d9]

http://bugs.dwscoalition.org/show_bug.cgi?id=2623

Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.

Patch by [personal profile] fu.

Files modified:
  • cgi-bin/LJ/SynSuck.pm
  • t/synsuck.t
--------------------------------------------------------------------------------
diff -r c3fd821250d8 -r 2a8cca4347d9 cgi-bin/LJ/SynSuck.pm
--- a/cgi-bin/LJ/SynSuck.pm	Wed Jan 26 14:16:24 2011 +0800
+++ b/cgi-bin/LJ/SynSuck.pm	Sun Jan 30 20:48:49 2011 +0800
@@ -122,14 +122,13 @@ sub get_content {
     return [$res, $content];
 }
 
-sub process_content {
-    my ($urow, $resp, $verbose) = @_;
-
-    my ($res, $content) = @$resp;
-    my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
-        map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
-
-    my $dbh = LJ::get_db_writer();
+# helper function which takes feed XML
+# and returns a list of $num items from the feed
+# in proper order
+sub parse_items_from_feed {
+    my ( $content, $num, $verbose ) = @_;
+    $num ||= 20;
+    return ( 0, { type => "noitems" } ) unless defined $content;
 
     # WARNING: blatant XML spec violation ahead...
     #
@@ -165,18 +164,52 @@ sub process_content {
     }
 
     # parsing time...
-    my ($feed, $error) = LJ::ParseFeed::parse_feed($content);
-    if ($error) {
-        # parse error!
-        print "Parse error! $error\n" if $verbose;
-        delay($userid, 3*60, "parseerror");
-        $error =~ s! at /.*!!;
-        $error =~ s/^\n//; # cleanup of newline at the beginning of the line
-        my $syn_u = LJ::load_user( $user );
-        $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
-        return;
+    my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $content );
+    return ( 0, { type => "parseerror", message => $error } ) if $error;
+
+    # another sanity check
+    return ( 0, { type => "noitems" } ) unless ref $feed->{items} eq "ARRAY";
+
+    my @items = reverse @{$feed->{'items'}};
+
+    # take most recent 20
+    splice( @items, 0, @items - $num ) if @items > $num;
+
+    return ( 1, { items => \@items, feed => $feed } );
+}
+
+
+sub process_content {
+    my ($urow, $resp, $verbose) = @_;
+
+    my ($res, $content) = @$resp;
+    my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
+        map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
+
+    my $dbh = LJ::get_db_writer();
+
+    my ( $ok, $rv ) = parse_items_from_feed( $content, 20, $verbose );
+    unless ( $ok ) {
+        if ( $rv->{type} eq "parseerror" ) {
+            # parse error!
+            delay( $userid, 3*60, "parseerror" );
+            if ( my $error = $rv->{message} ) {
+                print "Parse error! $error\n" if $verbose;
+                $error =~ s! at /.*!!;
+                $error =~ s/^\n//; # cleanup of newline at the beginning of the line
+                my $syn_u = LJ::load_user( $user );
+                $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
+            }
+            return;
+        } elsif ( $rv->{type} eq "noitems" ) {
+            return delay( $userid, 3*60, "noitems" );
+        } else {
+            print "Unknown error type!\n" if $verbose;
+            return delay( $userid, 3*60, "unknown" );
+        }
     }
 
+    my $feed = $rv->{feed};
     # register feeds that can support hubbub.
     if ( LJ::is_enabled( 'hubbub' ) && $feed->{self} && $feed->{hub} ) {
         # this is a square operation.  register every "self" and the feed url along
@@ -191,15 +224,7 @@ sub process_content {
         }
     }
 
-    # another sanity check
-    unless (ref $feed->{'items'} eq "ARRAY") {
-        return delay($userid, 3*60, "noitems");
-    }
-
-    my @items = reverse @{$feed->{'items'}};
-
-    # take most recent 20
-    splice(@items, 0, @items-20) if @items > 20;
+    my @items = @{$rv->{items}};
 
     # delete existing items older than the age which can show on a
     # friends view.
diff -r c3fd821250d8 -r 2a8cca4347d9 t/synsuck.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/t/synsuck.t	Sun Jan 30 20:48:49 2011 +0800
@@ -0,0 +1,526 @@
+# -*-perl-*-
+use strict;
+use Test::More tests => 26;
+
+use lib "$ENV{LJHOME}/cgi-bin";
+require 'ljlib.pl';
+
+use LJ::SynSuck;
+
+
+sub err {
+    my ( $content, $type, $test ) = @_;
+
+    my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content );
+    ok( ! $ok, $test );
+    is( $rv->{type}, $type, $rv->{message} ? " $test - $rv->{message}" : $test );
+}
+
+sub success {
+    my ( $content, $test, %opts ) = @_;
+
+    my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content, $opts{num_items} );
+    ok( $ok, $test );
+    die $rv->{message} unless $ok;
+
+    return @{$rv->{items}};
+};
+
+
+note("Error");
+{
+    my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+        <channel>
+            <title>Blah
+        </channel>
+    </rss>
+    };
+
+    err( $content, "parseerror", "Mismatched tags" );
+}
+
+note("No items");
+{
+    my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 00:00:00 GMT</pubDate>
+    </channel>
+    </rss>
+    };
+
+    err( $content, "noitems", "Empty feed" );
+}
+
+
+note("RSS pubDate - descending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS pubDate (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS pubDate - ascending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS pubDate (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Atom - descending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <published>2011-01-23T17:38:49-08:00</published>
+            <updated>2011-01-23T17:38:49-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <published>2011-01-23T13:59:55-08:00</published>
+            <updated>2011-01-23T13:59:55-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <published>2011-01-23T13:58:08-08:00</published>
+            <updated>2011-01-23T13:58:08-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+    </feed>};
+
+    my @items = success( $content, "Correct order from Atom (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note( "Atom - ascending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <published>2011-01-23T13:58:08-08:00</published>
+            <updated>2011-01-23T13:58:08-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <published>2011-01-23T13:59:55-08:00</published>
+            <updated>2011-01-23T13:59:55-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <published>2011-01-23T17:38:49-08:00</published>
+            <updated>2011-01-23T17:38:49-08:00</updated>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+    </feed>};
+
+    my @items = success( $content, "Correct order from Atom (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note("RSS dc:date - descending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <dc:date>2011-01-24T03:00:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <dc:date>2011-01-23T05:30:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <dc:date>2011-01-17T20:00:00Z</dc:date>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS dc:date (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS dc:date - ascending");
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <dc:date>2011-01-17T20:00:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <dc:date>2011-01-23T05:30:00Z</dc:date>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <dc:date>2011-01-24T03:00:00Z</dc:date>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Correct order from RSS dc:date (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Without datestamp - descending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+    </feed>};
+
+    my @items = success( $content, "Correct order without datestamps (originally descending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally without datestamps in descending order)" );
+}
+
+note( "Without datestamp - ascending" );
+{
+    my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+        <title>Feed title</title>
+        <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+        <id>example:atom:feed</id>
+        <updated>2011-01-23T17:38:49-08:00</updated>
+
+        <entry>
+            <title>Item 1</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+            <id>1</id>
+            <author><name>someone</name></author>
+            <content type="html">foo</content>
+        </entry>
+
+        <entry>
+            <title>Item 2</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+            <id>2</id>
+            <author><name>someone</name></author>
+            <content type="html">bar</content>
+        </entry>
+
+        <entry>
+            <title>Item 3</title>
+            <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+            <id>3</id>
+            <author><name>someone</name></author>
+            <content type="html">baz</content>
+        </entry>
+
+    </feed>};
+
+    my @items = success( $content, "Correct order without datestamps (originally ascending)" );
+    is_deeply( [ map {$_->{id}} @items ], [ 3, 2, 1 ], "Items from feed returned in what we guessed is the correct order (originally without datestamps in ascending order)" );
+}
+
+
+note( "Active feed - too many items - descending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in descending order)" );
+}
+
+note( "Active feed - too many items - ascending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+            <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+            <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+            <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in ascending order)" );
+}
+
+note( "Active feed - too many items - no datestamp ascending" );
+{
+    my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+    <rss version="2.0">
+    <channel>
+        <title>Title</title>
+        <link>http://www.example.com/</link>
+        <description>Some Feed</description>
+        <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+        <item>
+            <title>Item 1</title>
+            <link>http://example.com/feed/1</link>
+            <description>foo</description>
+            <author>someone</author>
+            <guid isPermaLink="false">1</guid>
+        </item>
+
+        <item>
+            <title>Item 2</title>
+            <link>http://example.com/feed/2</link>
+            <description>bar</description>
+            <author>someone</author>
+            <guid isPermaLink="false">2</guid>
+        </item>
+
+        <item>
+            <title>Item 3</title>
+            <link>http://example.com/feed/3</link>
+            <description>baz</description>
+            <author>someone</author>
+            <guid isPermaLink="false">3</guid>
+        </item>
+    </channel>
+    </rss>};
+
+    my @items = success( $content, "Latest two items in the feed (guessed)", num_items => 2 );
+    is_deeply( [ map {$_->{id}} @items ], [ 2, 1 ], "Returned what we guessed are the latest two items from feed (originally without datestamps in ascending order)" );
+}
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org