[dw-free] new feeds pulling in entries in reverse order
[commit: http://hg.dwscoalition.org/dw-free/rev/2a8cca4347d9]
http://bugs.dwscoalition.org/show_bug.cgi?id=2623
Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.
Patch by
fu.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=2623
Refactor to create a helper function that takes the feed XML and returns a
list of ordered item from the feed. Add tests.
Patch by
![[personal profile]](https://www.dreamwidth.org/img/silk/identity/user.png)
Files modified:
- cgi-bin/LJ/SynSuck.pm
- t/synsuck.t
-------------------------------------------------------------------------------- diff -r c3fd821250d8 -r 2a8cca4347d9 cgi-bin/LJ/SynSuck.pm --- a/cgi-bin/LJ/SynSuck.pm Wed Jan 26 14:16:24 2011 +0800 +++ b/cgi-bin/LJ/SynSuck.pm Sun Jan 30 20:48:49 2011 +0800 @@ -122,14 +122,13 @@ sub get_content { return [$res, $content]; } -sub process_content { - my ($urow, $resp, $verbose) = @_; - - my ($res, $content) = @$resp; - my ($user, $userid, $synurl, $lastmod, $etag, $readers) = - map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders); - - my $dbh = LJ::get_db_writer(); +# helper function which takes feed XML +# and returns a list of $num items from the feed +# in proper order +sub parse_items_from_feed { + my ( $content, $num, $verbose ) = @_; + $num ||= 20; + return ( 0, { type => "noitems" } ) unless defined $content; # WARNING: blatant XML spec violation ahead... # @@ -165,18 +164,52 @@ sub process_content { } # parsing time... - my ($feed, $error) = LJ::ParseFeed::parse_feed($content); - if ($error) { - # parse error! - print "Parse error! $error\n" if $verbose; - delay($userid, 3*60, "parseerror"); - $error =~ s! at /.*!!; - $error =~ s/^\n//; # cleanup of newline at the beginning of the line - my $syn_u = LJ::load_user( $user ); - $syn_u->set_prop( "rssparseerror", $error ) if $syn_u; - return; + my ( $feed, $error ) = LJ::ParseFeed::parse_feed( $content ); + return ( 0, { type => "parseerror", message => $error } ) if $error; + + # another sanity check + return ( 0, { type => "noitems" } ) unless ref $feed->{items} eq "ARRAY"; + + my @items = reverse @{$feed->{'items'}}; + + # take most recent 20 + splice( @items, 0, @items - $num ) if @items > $num; + + return ( 1, { items => \@items, feed => $feed } ); +} + + +sub process_content { + my ($urow, $resp, $verbose) = @_; + + my ($res, $content) = @$resp; + my ($user, $userid, $synurl, $lastmod, $etag, $readers) = + map { $urow->{$_} } qw(user userid synurl lastmod etag numreaders); + + my $dbh = LJ::get_db_writer(); + + my ( $ok, $rv ) = parse_items_from_feed( $content, 20, $verbose ); + unless ( $ok ) { + if ( $rv->{type} eq "parseerror" ) { + # parse error! + delay( $userid, 3*60, "parseerror" ); + if ( my $error = $rv->{message} ) { + print "Parse error! $error\n" if $verbose; + $error =~ s! at /.*!!; + $error =~ s/^\n//; # cleanup of newline at the beginning of the line + my $syn_u = LJ::load_user( $user ); + $syn_u->set_prop( "rssparseerror", $error ) if $syn_u; + } + return; + } elsif ( $rv->{type} eq "noitems" ) { + return delay( $userid, 3*60, "noitems" ); + } else { + print "Unknown error type!\n" if $verbose; + return delay( $userid, 3*60, "unknown" ); + } } + my $feed = $rv->{feed}; # register feeds that can support hubbub. if ( LJ::is_enabled( 'hubbub' ) && $feed->{self} && $feed->{hub} ) { # this is a square operation. register every "self" and the feed url along @@ -191,15 +224,7 @@ sub process_content { } } - # another sanity check - unless (ref $feed->{'items'} eq "ARRAY") { - return delay($userid, 3*60, "noitems"); - } - - my @items = reverse @{$feed->{'items'}}; - - # take most recent 20 - splice(@items, 0, @items-20) if @items > 20; + my @items = @{$rv->{items}}; # delete existing items older than the age which can show on a # friends view. diff -r c3fd821250d8 -r 2a8cca4347d9 t/synsuck.t --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/t/synsuck.t Sun Jan 30 20:48:49 2011 +0800 @@ -0,0 +1,526 @@ +# -*-perl-*- +use strict; +use Test::More tests => 26; + +use lib "$ENV{LJHOME}/cgi-bin"; +require 'ljlib.pl'; + +use LJ::SynSuck; + + +sub err { + my ( $content, $type, $test ) = @_; + + my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content ); + ok( ! $ok, $test ); + is( $rv->{type}, $type, $rv->{message} ? " $test - $rv->{message}" : $test ); +} + +sub success { + my ( $content, $test, %opts ) = @_; + + my ( $ok, $rv ) = LJ::SynSuck::parse_items_from_feed( $content, $opts{num_items} ); + ok( $ok, $test ); + die $rv->{message} unless $ok; + + return @{$rv->{items}}; +}; + + +note("Error"); +{ + my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Blah + </channel> + </rss> + }; + + err( $content, "parseerror", "Mismatched tags" ); +} + +note("No items"); +{ + my $content = q{<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 00:00:00 GMT</pubDate> + </channel> + </rss> + }; + + err( $content, "noitems", "Empty feed" ); +} + + +note("RSS pubDate - descending"); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate> + </item> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate> + </item> + + </channel> + </rss>}; + + my @items = success( $content, "Correct order from RSS pubDate (originally descending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" ); +} + +note("RSS pubDate - ascending"); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate> + </item> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate> + </item> + </channel> + </rss>}; + + my @items = success( $content, "Correct order from RSS pubDate (originally ascending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" ); +} + +note( "Atom - descending" ); +{ + my $content = q{<?xml version="1.0" encoding="UTF-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <title>Feed title</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom" /> + <id>example:atom:feed</id> + <updated>2011-01-23T17:38:49-08:00</updated> + + <entry> + <title>Item 3</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" /> + <id>3</id> + <published>2011-01-23T17:38:49-08:00</published> + <updated>2011-01-23T17:38:49-08:00</updated> + <author><name>someone</name></author> + <content type="html">baz</content> + </entry> + + <entry> + <title>Item 2</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" /> + <id>2</id> + <published>2011-01-23T13:59:55-08:00</published> + <updated>2011-01-23T13:59:55-08:00</updated> + <author><name>someone</name></author> + <content type="html">bar</content> + </entry> + + <entry> + <title>Item 1</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" /> + <id>1</id> + <published>2011-01-23T13:58:08-08:00</published> + <updated>2011-01-23T13:58:08-08:00</updated> + <author><name>someone</name></author> + <content type="html">foo</content> + </entry> + </feed>}; + + my @items = success( $content, "Correct order from Atom (originally descending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" ); +} + +note( "Atom - ascending" ); +{ + my $content = q{<?xml version="1.0" encoding="UTF-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <title>Feed title</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom" /> + <id>example:atom:feed</id> + <updated>2011-01-23T17:38:49-08:00</updated> + + <entry> + <title>Item 1</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" /> + <id>1</id> + <published>2011-01-23T13:58:08-08:00</published> + <updated>2011-01-23T13:58:08-08:00</updated> + <author><name>someone</name></author> + <content type="html">foo</content> + </entry> + + <entry> + <title>Item 2</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" /> + <id>2</id> + <published>2011-01-23T13:59:55-08:00</published> + <updated>2011-01-23T13:59:55-08:00</updated> + <author><name>someone</name></author> + <content type="html">bar</content> + </entry> + + <entry> + <title>Item 3</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" /> + <id>3</id> + <published>2011-01-23T17:38:49-08:00</published> + <updated>2011-01-23T17:38:49-08:00</updated> + <author><name>someone</name></author> + <content type="html">baz</content> + </entry> + + </feed>}; + + my @items = success( $content, "Correct order from Atom (originally ascending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" ); +} + +note("RSS dc:date - descending"); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <dc:date>2011-01-24T11:06:54Z</dc:date> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <dc:date>2011-01-24T03:00:00Z</dc:date> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <dc:date>2011-01-23T05:30:00Z</dc:date> + </item> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <dc:date>2011-01-17T20:00:00Z</dc:date> + </item> + + </channel> + </rss>}; + + my @items = success( $content, "Correct order from RSS dc:date (originally descending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in descending order)" ); +} + +note("RSS dc:date - ascending"); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <dc:date>2011-01-24T11:06:54Z</dc:date> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <dc:date>2011-01-17T20:00:00Z</dc:date> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <dc:date>2011-01-23T05:30:00Z</dc:date> + </item> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <dc:date>2011-01-24T03:00:00Z</dc:date> + </item> + </channel> + </rss>}; + + my @items = success( $content, "Correct order from RSS dc:date (originally ascending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally in ascending order)" ); +} + +note( "Without datestamp - descending" ); +{ + my $content = q{<?xml version="1.0" encoding="UTF-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <title>Feed title</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom" /> + <id>example:atom:feed</id> + <updated>2011-01-23T17:38:49-08:00</updated> + + <entry> + <title>Item 3</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" /> + <id>3</id> + <author><name>someone</name></author> + <content type="html">baz</content> + </entry> + + <entry> + <title>Item 2</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" /> + <id>2</id> + <author><name>someone</name></author> + <content type="html">bar</content> + </entry> + + <entry> + <title>Item 1</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" /> + <id>1</id> + <author><name>someone</name></author> + <content type="html">foo</content> + </entry> + </feed>}; + + my @items = success( $content, "Correct order without datestamps (originally descending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 1, 2, 3 ], "Items from feed returned in correct order (originally without datestamps in descending order)" ); +} + +note( "Without datestamp - ascending" ); +{ + my $content = q{<?xml version="1.0" encoding="UTF-8"?> + <feed xmlns="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/"> + <title>Feed title</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom" /> + <id>example:atom:feed</id> + <updated>2011-01-23T17:38:49-08:00</updated> + + <entry> + <title>Item 1</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/1" /> + <id>1</id> + <author><name>someone</name></author> + <content type="html">foo</content> + </entry> + + <entry> + <title>Item 2</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/2" /> + <id>2</id> + <author><name>someone</name></author> + <content type="html">bar</content> + </entry> + + <entry> + <title>Item 3</title> + <link rel="alternate" type="text/html" href="http://example.com/feed/atom/3" /> + <id>3</id> + <author><name>someone</name></author> + <content type="html">baz</content> + </entry> + + </feed>}; + + my @items = success( $content, "Correct order without datestamps (originally ascending)" ); + is_deeply( [ map {$_->{id}} @items ], [ 3, 2, 1 ], "Items from feed returned in what we guessed is the correct order (originally without datestamps in ascending order)" ); +} + + +note( "Active feed - too many items - descending" ); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate> + </item> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate> + </item> + + </channel> + </rss>}; + + my @items = success( $content, "Latest two items in the feed", num_items => 2 ); + is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in descending order)" ); +} + +note( "Active feed - too many items - ascending" ); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + <pubDate>Mon, 17 Jan 2011 20:00:00 GMT</pubDate> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + <pubDate>Sun, 23 Jan 2011 05:30:00 GMT</pubDate> + </item> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + <pubDate>Mon, 24 Jan 2011 03:00:00 GMT</pubDate> + </item> + </channel> + </rss>}; + + my @items = success( $content, "Latest two items in the feed", num_items => 2 ); + is_deeply( [ map {$_->{id}} @items ], [ 2, 3 ], "Returned latest two items from feed (originally in ascending order)" ); +} + +note( "Active feed - too many items - no datestamp ascending" ); +{ + my $content = q {<?xml version="1.0" encoding="ISO-8859-1"?> + <rss version="2.0"> + <channel> + <title>Title</title> + <link>http://www.example.com/</link> + <description>Some Feed</description> + <pubDate>Mon, 24 Jan 2011 11:06:54 GMT</pubDate> + + <item> + <title>Item 1</title> + <link>http://example.com/feed/1</link> + <description>foo</description> + <author>someone</author> + <guid isPermaLink="false">1</guid> + </item> + + <item> + <title>Item 2</title> + <link>http://example.com/feed/2</link> + <description>bar</description> + <author>someone</author> + <guid isPermaLink="false">2</guid> + </item> + + <item> + <title>Item 3</title> + <link>http://example.com/feed/3</link> + <description>baz</description> + <author>someone</author> + <guid isPermaLink="false">3</guid> + </item> + </channel> + </rss>}; + + my @items = success( $content, "Latest two items in the feed (guessed)", num_items => 2 ); + is_deeply( [ map {$_->{id}} @items ], [ 2, 1 ], "Returned what we guessed are the latest two items from feed (originally without datestamps in ascending order)" ); +} --------------------------------------------------------------------------------