[dw-free] new feeds pulling in entries in reverse order
[commit: http://hg.dwscoalition.org/dw-free/rev/2a8cca4347d9]
http://bugs.dwscoalition.org/show_bug.cgi?id=2623
Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.
Patch by
fu.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=2623
Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.
Patch by
Files modified:
- cgi-bin/LJ/SynSuck.pm
- t/synsuck.t
--------------------------------------------------------------------------------
diff -r c3fd821250d8 -r 2a8cca4347d9 cgi-bin/LJ/SynSuck.pm
--- a/cgi-bin/LJ/SynSuck.pm Wed Jan 26 14:16:24 2011 +0800
+++ b/cgi-bin/LJ/SynSuck.pm Sun Jan 30 20:48:49 2011 +0800
@@ -122,14 +122,13 @@ sub get_content {
return [$res, $content];
}
-sub process_content {
- my ($urow, $resp, $verbose) = @_;
-
- my ($res, $content) = @$resp;
- my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
- map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
-
- my $dbh = LJ::get_db_writer();
+# helper function which takes feed XML
+# and returns a list of $num items from the feed
+# in proper order
+sub parse_items_from_feed {
+ my ( $content, $num, $verbose ) = @_;
+ $num ||= 20;
+ return ( 0, { type => "noitems" } ) unless defined $content;
# WARNING: blatant XML spec violation ahead...
#
@@ -165,18 +164,52 @@ sub process_content {
}
# parsing time...
- my ($feed, $error) = LJ::ParseFeed::parse_feed($content);
- if ($error) {
- # parse error!
- print "Parse error! $error\n" if $verbose;
- delay($userid, 3*60, "parseerror");
- $error =~ s! at /.*!!;
- $error =~ s/^\n//; # cleanup of newline at the beginning of the line
- my $syn_u = LJ::load_user( $user );
- $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
- return;
+ my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $content );
+ return ( 0, { type => "parseerror", message => $error } ) if $error;
+
+ # another sanity check
+ return ( 0, { type => "noitems" } ) unless ref $feed->{items} eq "ARRAY";
+
+ my @items = reverse @{$feed->{'items'}};
+
+ # take most recent 20
+ splice( @items, 0, @items - $num ) if @items > $num;
+
+ return ( 1, { items => \@items, feed => $feed } );
+}
+
+
+sub process_content {
+ my ($urow, $resp, $verbose) = @_;
+
+ my ($res, $content) = @$resp;
+ my ($user, $userid, $synurl, $lastmod, $etag, $readers) =
+ map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders);
+
+ my $dbh = LJ::get_db_writer();
+
+ my ( $ok, $rv ) = parse_items_from_feed( $content, 20, $verbose );
+ unless ( $ok ) {
+ if ( $rv->{type} eq "parseerror" ) {
+ # parse error!
+ delay( $userid, 3*60, "parseerror" );
+ if ( my $error = $rv->{message} ) {
+ print "Parse error! $error\n" if $verbose;
+ $error =~ s! at /.*!!;
+ $error =~ s/^\n//; # cleanup of newline at the beginning of the line
+ my $syn_u = LJ::load_user( $user );
+ $syn_u->set_prop( "rssparseerror", $error ) if $syn_u;
+ }
+ return;
+ } elsif ( $rv->{type} eq "noitems" ) {
+ return delay( $userid, 3*60, "noitems" );
+ } else {
+ print "Unknown error type!\n" if $verbose;
+ return delay( $userid, 3*60, "unknown" );
+ }
}
+ my $feed = $rv->{feed};
# register feeds that can support hubbub.
if ( LJ::is_enabled( 'hubbub' ) && $feed->{self} && $feed->{hub} ) {
# this is a square operation. register every "self" and the feed url along
@@ -191,15 +224,7 @@ sub process_content {
}
}
- # another sanity check
- unless (ref $feed->{'items'} eq "ARRAY") {
- return delay($userid, 3*60, "noitems");
- }
-
- my @items = reverse @{$feed->{'items'}};
-
- # take most recent 20
- splice(@items, 0, @items-20) if @items > 20;
+ my @items = @{$rv->{items}};
# delete existing items older than the age which can show on a
# friends view.
diff -r c3fd821250d8 -r 2a8cca4347d9 t/synsuck.t
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/t/synsuck.t Sun Jan 30 20:48:49 2011 +0800
@@ -0,0 +1,526 @@
+# -*-perl-*-
+use strict;
+use Test::More tests => 26;
+
+use lib "$ENV{LJHOME}/cgi-bin";
+require 'ljlib.pl';
+
+use LJ::SynSuck;
+
+
+sub err {
+ my ( $content, $type, $test ) = @_;
+
+ my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content );
+ ok( ! $ok, $test );
+ is( $rv->{type}, $type, $rv->{message} ? " $test - $rv->{message}" : $test );
+}
+
+sub success {
+ my ( $content, $test, %opts ) = @_;
+
+ my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content, $opts{num_items} );
+ ok( $ok, $test );
+ die $rv->{message} unless $ok;
+
+ return @{$rv->{items}};
+};
+
+
+note("Error");
+{
+ my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Blah
+ </channel>
+ </rss>
+ };
+
+ err( $content, "parseerror", "Mismatched tags" );
+}
+
+note("No items");
+{
+ my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 00:00:00 GMT</pubDate>
+ </channel>
+ </rss>
+ };
+
+ err( $content, "noitems", "Empty feed" );
+}
+
+
+note("RSS pubDate - descending");
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+ </item>
+
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Correct order from RSS pubDate (originally descending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS pubDate - ascending");
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+ </item>
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Correct order from RSS pubDate (originally ascending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Atom - descending" );
+{
+ my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <title>Feed title</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+ <id>example:atom:feed</id>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+
+ <entry>
+ <title>Item 3</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+ <id>3</id>
+ <published>2011-01-23T17:38:49-08:00</published>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">baz</content>
+ </entry>
+
+ <entry>
+ <title>Item 2</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+ <id>2</id>
+ <published>2011-01-23T13:59:55-08:00</published>
+ <updated>2011-01-23T13:59:55-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">bar</content>
+ </entry>
+
+ <entry>
+ <title>Item 1</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+ <id>1</id>
+ <published>2011-01-23T13:58:08-08:00</published>
+ <updated>2011-01-23T13:58:08-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">foo</content>
+ </entry>
+ </feed>};
+
+ my @items = success( $content, "Correct order from Atom (originally descending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note( "Atom - ascending" );
+{
+ my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <title>Feed title</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+ <id>example:atom:feed</id>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+
+ <entry>
+ <title>Item 1</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+ <id>1</id>
+ <published>2011-01-23T13:58:08-08:00</published>
+ <updated>2011-01-23T13:58:08-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">foo</content>
+ </entry>
+
+ <entry>
+ <title>Item 2</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+ <id>2</id>
+ <published>2011-01-23T13:59:55-08:00</published>
+ <updated>2011-01-23T13:59:55-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">bar</content>
+ </entry>
+
+ <entry>
+ <title>Item 3</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+ <id>3</id>
+ <published>2011-01-23T17:38:49-08:00</published>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+ <author><name>someone</name></author>
+ <content type="html">baz</content>
+ </entry>
+
+ </feed>};
+
+ my @items = success( $content, "Correct order from Atom (originally ascending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note("RSS dc:date - descending");
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <dc:date>2011-01-24T03:00:00Z</dc:date>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <dc:date>2011-01-23T05:30:00Z</dc:date>
+ </item>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <dc:date>2011-01-17T20:00:00Z</dc:date>
+ </item>
+
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Correct order from RSS dc:date (originally descending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" );
+}
+
+note("RSS dc:date - ascending");
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <dc:date>2011-01-24T11:06:54Z</dc:date>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <dc:date>2011-01-17T20:00:00Z</dc:date>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <dc:date>2011-01-23T05:30:00Z</dc:date>
+ </item>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <dc:date>2011-01-24T03:00:00Z</dc:date>
+ </item>
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Correct order from RSS dc:date (originally ascending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" );
+}
+
+note( "Without datestamp - descending" );
+{
+ my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <title>Feed title</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+ <id>example:atom:feed</id>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+
+ <entry>
+ <title>Item 3</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+ <id>3</id>
+ <author><name>someone</name></author>
+ <content type="html">baz</content>
+ </entry>
+
+ <entry>
+ <title>Item 2</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+ <id>2</id>
+ <author><name>someone</name></author>
+ <content type="html">bar</content>
+ </entry>
+
+ <entry>
+ <title>Item 1</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+ <id>1</id>
+ <author><name>someone</name></author>
+ <content type="html">foo</content>
+ </entry>
+ </feed>};
+
+ my @items = success( $content, "Correct order without datestamps (originally descending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally without datestamps in descending order)" );
+}
+
+note( "Without datestamp - ascending" );
+{
+ my $content = q{<?xml version="1.0" encoding="UTF-8"?>
+ <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <title>Feed title</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom" />
+ <id>example:atom:feed</id>
+ <updated>2011-01-23T17:38:49-08:00</updated>
+
+ <entry>
+ <title>Item 1</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" />
+ <id>1</id>
+ <author><name>someone</name></author>
+ <content type="html">foo</content>
+ </entry>
+
+ <entry>
+ <title>Item 2</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" />
+ <id>2</id>
+ <author><name>someone</name></author>
+ <content type="html">bar</content>
+ </entry>
+
+ <entry>
+ <title>Item 3</title>
+ <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" />
+ <id>3</id>
+ <author><name>someone</name></author>
+ <content type="html">baz</content>
+ </entry>
+
+ </feed>};
+
+ my @items = success( $content, "Correct order without datestamps (originally ascending)" );
+ is_deeply( [ map {$_->{id}} @items ], [ 3, 2, 1 ], "Items from feed returned in what we guessed is the correct order (originally without datestamps in ascending order)" );
+}
+
+
+note( "Active feed - too many items - descending" );
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+ </item>
+
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+ is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in descending order)" );
+}
+
+note( "Active feed - too many items - ascending" );
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate>
+ </item>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate>
+ </item>
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Latest two items in the feed", num_items => 2 );
+ is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in ascending order)" );
+}
+
+note( "Active feed - too many items - no datestamp ascending" );
+{
+ my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?>
+ <rss version="2.0">
+ <channel>
+ <title>Title</title>
+ <link>http://www.example.com/</link>
+ <description>Some Feed</description>
+ <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate>
+
+ <item>
+ <title>Item 1</title>
+ <link>http://example.com/feed/1</link>
+ <description>foo</description>
+ <author>someone</author>
+ <guid isPermaLink="false">1</guid>
+ </item>
+
+ <item>
+ <title>Item 2</title>
+ <link>http://example.com/feed/2</link>
+ <description>bar</description>
+ <author>someone</author>
+ <guid isPermaLink="false">2</guid>
+ </item>
+
+ <item>
+ <title>Item 3</title>
+ <link>http://example.com/feed/3</link>
+ <description>baz</description>
+ <author>someone</author>
+ <guid isPermaLink="false">3</guid>
+ </item>
+ </channel>
+ </rss>};
+
+ my @items = success( $content, "Latest two items in the feed (guessed)", num_items => 2 );
+ is_deeply( [ map {$_->{id}} @items ], [ 2, 1 ], "Returned what we guessed are the latest two items from feed (originally without datestamps in ascending order)" );
+}
--------------------------------------------------------------------------------
