[dw-free] Implement Latest Posts feed
[commit: http://hg.dwscoalition.org/dw-free/rev/57b0fb9cca14]
http://bugs.dwscoalition.org/show_bug.cgi?id=316
Initial support for latest things. Right now this only tracks entries (but
much of the plumbing is there for comments) and it only outputs in HTML.
However, it DOES let us define well known tags so that we can have latest
post feeds for certain tags.
Patch by
mark.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=316
Initial support for latest things. Right now this only tracks entries (but
much of the plumbing is there for comments) and it only outputs in HTML.
However, it DOES let us define well known tags so that we can have latest
post feeds for certain tags.
Patch by
![[staff profile]](https://www.dreamwidth.org/img/silk/identity/user_staff.png)
Files modified:
- bin/worker/latest-feed
- cgi-bin/DW/LatestFeed.pm
- cgi-bin/DW/Worker/LatestFeed.pm
- cgi-bin/LJ/User.pm
- cgi-bin/ljlib.pl
- cgi-bin/ljprotocol.pl
- doc/config-private.pl.txt
- htdocs/latest.bml
- htdocs/stc/latest.css
-------------------------------------------------------------------------------- diff -r 4c1f8b40aa26 -r 57b0fb9cca14 bin/worker/latest-feed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/worker/latest-feed Sat Oct 31 04:48:12 2009 +0000 @@ -0,0 +1,25 @@ +#!/usr/bin/perl +# +# latest-feed +# +# Dispatches the latest-feed job. See cgi-bin/DW/Worker/LatestFeed.pm. +# +# Authors: +# Mark Smith <mark@dreamwidth.org> +# +# Copyright (c) 2009 by Dreamwidth Studios, LLC. +# +# This program is free software; you may redistribute it and/or modify it under +# the same terms as Perl itself. For a copy of the license, please reference +# 'perldoc perlartistic' or 'perldoc perlgpl'. +# + +use strict; +use lib "$ENV{LJHOME}/cgi-bin"; + +require 'ljlib.pl'; +use LJ::Worker::TheSchwartz; +use DW::Worker::LatestFeed; + +schwartz_decl( "DW::Worker::LatestFeed" ); +schwartz_work(); diff -r 4c1f8b40aa26 -r 57b0fb9cca14 cgi-bin/DW/LatestFeed.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgi-bin/DW/LatestFeed.pm Sat Oct 31 04:48:12 2009 +0000 @@ -0,0 +1,266 @@ +#!/usr/bin/perl +# +# DW::LatestFeed +# +# This module is the "frontend" for the latest feed. You call this module to +# insert something into the feed or get the feed back in a consumable fashion. +# There is a lot of room for optimization to make this process more efficient +# but for now I haven't really done that. +# +# Authors: +# Mark Smith <mark@dreamwidth.org> +# +# Copyright (c) 2009 by Dreamwidth Studios, LLC. +# +# This program is free software; you may redistribute it and/or modify it under +# the same terms as Perl itself. For a copy of the license, please reference +# 'perldoc perlartistic' or 'perldoc perlgpl'. +# + +package DW::LatestFeed; +use strict; + +# time in seconds to hold events for. until an event is this old, we will not +# show it on any page. +use constant EVENT_HORIZON => 300; + +# call this with whatever you want to stick onto the latest feed, and note that +# this just fires off TheSchwartz jobs, the work isn't actually done until the +# worker process it +sub new_item { + my ( $class, $obj ) = @_; + return unless $obj && ref $obj; + + my $sclient = LJ::theschwartz() or return; + + # entries are [ journalid, jitemid ] which lets us get the LJ::Entry back + if ( $obj->isa( 'LJ::Entry' ) ) { + $sclient->insert_jobs( + TheSchwartz::Job->new_from_array( 'DW::Worker::LatestFeed', { + type => 'entry', + journalid => $obj->journalid, + jitemid => $obj->jitemid, + } ) + ); + + # comments are stored as [ journalid, jtalkid ] which allows us to rebuild + # the object easily + } elsif ( $obj->isa( 'LJ::Comment' ) ) { + $sclient->insert_jobs( + TheSchwartz::Job->new_from_array( 'DW::Worker::LatestFeed', { + type => 'comment', + journalid => $obj->journalid, + jtalkid => $obj->jtalkid, + } ) + ); + + } + + return undef; +} + +# returns arrayref of item hashrefs that you can handle and display if you want +sub get_items { + my ( $class, %opts ) = @_; + return if $opts{tag} && ! exists $LJ::LATEST_TAGS{$opts{tag}}; + + # make sure we process the queue of items first. this makes sure that if we + # don't have much traffic we don't have to wait for new posts to drive the + # processor. + $class->_process_queue; + + # and simply get the list and return it ... simplicity + my $mckey = $opts{tag} ? "latest_items_tag:$opts{tag}" : "latest_items"; + return LJ::MemCache::get( $mckey ); +} + +# INTERNAL; called by the worker when there's an item for us to handle. at this +# point we are guaranteed to be the only active task updating the memcache keys +sub _process_item { + my ( $class, $opts ) = @_; + return unless $opts && ref $opts eq 'HASH'; + + # we need to get the latest queue lock so we can edit it. note that we will + # try and try to get the lock because we really really want to succeed + my $lock; + while ( 1 ) { + $lock = LJ::locker()->trylock( 'latest_queue' ); + last if $lock; + + # pause for 0.0-0.3 seconds to shuffle things up. generally good behavior + # when you're contending for locks. + select undef, undef, undef, rand() * 0.3; + } + + # the way this works, since we want a 5 minute delay on items being posted and + # appearing, is that when we get an item to process we just want to put it onto + # an array. when we LOAD the list we will process it, if we need to. + my $dest = LJ::MemCache::get( 'latest_queue' ) || []; + $opts->{t} = time + EVENT_HORIZON; + push @$dest, $opts; + + # prune the list if it gets too large + if ( scalar @$dest > 10_000 ) { + warn "$class->_process_item: latest_queue too large, dropping items.\n"; + @$dest = splice @$dest, 0, 10_000; + } + + # now stick it in memcache + LJ::MemCache::set( latest_queue => $dest ); + + # and just in case, try to process the queue since we're here anyway + $class->_process_queue( have_lock => 1 ); +} + + +# INTERNL; called and attempts to do something with the latest items queue +sub _process_queue { + my ( $class, %opts ) = @_; + + # we only process the queue every 60 seconds, no matter how often users might + # ask for a page. check the timer and bail if it's too soon. + my $now = time; + return unless ( LJ::MemCache::get( 'latest_queue_next' ) || 0 ) <= $now; + + # if we can't get the lock that means somebody else is processing the queue right + # now so we should do nothing. this returns immediately if the lock can't be gotten. + my $lock; + unless ( $opts{have_lock} ) { + $lock = LJ::locker()->trylock( 'latest_queue' ) + or return; + } + + # update timer, now that we know we're the ones to do the work + LJ::MemCache::set( latest_queue_next => $now + 60 ); + + # get queue to process + my $lq = LJ::MemCache::get( 'latest_queue' ); + return unless $lq && ref $lq eq 'ARRAY' && @$lq; + + # BLOCK OF COMMENT TEXT + # + # okay, so this entire process is rather contorted but it's the only way to get the + # efficient behavior we want. potentially the latest queue can have a zillion items + # in it, so we want to make sure to load things in the most efficient patterns possible. + # apologies for the convolutedness. + # + + # step 1) determine which items we can flat out ignore, dump those on the @rq and the + # rest onto the @pq + + my ( @pq, @rq ); + foreach my $item ( @$lq ) { + + # result queue it if it has not passed our event horizon time yet + if ( $now < $item->{t} ) { + push @rq, $item; + next; + } + + push @pq, $item; + } + + # step 1.5) we are done with the latest queue so we can toss that back into memcache and + # set the timer for the next update. + + LJ::MemCache::set( latest_queue => \@rq ); + + # step 2) load the user objects in one swoop. we have to do this first because the + # objects we instantiant in step 3 need the user objects. if you give them a userid + # they will load the user one by one, which is inefficient. this is better. + + my $us = LJ::load_userids( map { $_->{journalid} } @pq ); + + # step 3) create the objects we need. we create them all first and DO NOT TOUCH THEM + # so that we can take advantage of the singleton loading. + + foreach my $item ( @pq ) { + # now, we want to create an object for the item + if ( $item->{type} eq 'entry' ) { + $item->{obj} = LJ::Entry->new( $us->{$item->{journalid}}, jitemid => $item->{jitemid} ); + } elsif ( $item->{type} eq 'comment' ) { + $item->{obj} = LJ::Comment->new( $us->{$item->{journalid}}, jtalkid => $item->{jtalkid} ); + } + } + + # step 4) now we have to process the comments to dig up the entry they go to. this + # causes the comments to preload. + + foreach my $item ( @pq ) { + if ( $item->{type} eq 'comment' ) { + $item->{obj_entry} = $item->{obj}->entry; + } + } + + # step 5) get all of the poster ids for the entries and comments so that we can load those in one + # massive swoop + + # get userids for comments, entries, and then filter based on what we already have + my @uids = map { $_->{obj}->posterid } grep { $_->{type} eq 'entry' } @pq; + push @uids, map { $_->{obj}->posterid, $_->{obj_entry}->posterid } grep { $_->{type} eq 'comment' } @pq; + @uids = grep { ! exists $us->{$_} } @uids; + + # load the new users, backport to $us + my $us2 = LJ::load_userids( @uids ); + $us->{$_} = $us2->{$_} foreach keys %$us2; + + # step 6) now we can iterate over everything and see what should be shown or not. the items + # that make the cut are stuck on @gq. + + my $show_entry = sub { + my $entry = $_[0]; + + return unless $entry->security eq 'public'; + return unless $entry->poster->include_in_latest_feed && + $entry->journal->include_in_latest_feed; + }; + + my @gq; + foreach my $item ( @pq ) { + + if ( $item->{type} eq 'entry' ) { + # push the entry if it passes muster + push @gq, $item if $show_entry->( $item->{obj} ); + + } elsif ( $item->{type} eq 'comment' ) { + # the comment has to be visible and the poster allows latest feed + next unless $item->{obj}->is_active && + $item->{obj}->poster->include_in_latest_feed; + + # now push it, but only if the entry is OK + push @gq, $item if $show_entry->( $item->{obj_entry} ); + } + } + + # step 7) now that we have the good items, we want to sort them and put them on the + # list of latest items + my %lists = ( latest_items => LJ::MemCache::get( 'latest_items' ) || [] ); + foreach my $item ( @gq ) { + # $ent is always the entry, since comments always have obj_entry, and if that doesn't + # exist then obj will be the entry + my $ent = $item->{obj_entry} || $item->{obj}; + delete $item->{obj}; + delete $item->{obj_entry}; + + # step 7.5) if the entry contains any tags that we are currently showing + # globally, then put that onto the list + foreach my $tag ( grep { $LJ::LATEST_TAGS{$_} } $ent->tags ) { + my $nom = "latest_items_tag:$tag"; + $lists{$nom} ||= LJ::MemCache::get( $nom ) || []; + unshift @{$lists{$nom}}, $item; + } + + unshift @{$lists{latest_items}}, $item; + } + + # prune and set all lists + foreach my $key ( keys %lists ) { + @{$lists{$key}} = splice @{$lists{$key}}, 0, 1000; + LJ::MemCache::set( $key => $lists{$key} ); + } + + # we're done now +} + + +1; diff -r 4c1f8b40aa26 -r 57b0fb9cca14 cgi-bin/DW/Worker/LatestFeed.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cgi-bin/DW/Worker/LatestFeed.pm Sat Oct 31 04:48:12 2009 +0000 @@ -0,0 +1,40 @@ +#!/usr/bin/perl +# +# DW::Worker::LatestFeed +# +# Intermediary worker that lets us pipeline new items so we only have one +# task that can process them at a time. +# +# Authors: +# Mark Smith <mark@dreamwidth.org> +# +# Copyright (c) 2009 by Dreamwidth Studios, LLC. +# +# This program is free software; you may redistribute it and/or modify it under +# the same terms as Perl itself. For a copy of the license, please reference +# 'perldoc perlartistic' or 'perldoc perlgpl'. +# + +package DW::Worker::LatestFeed; + +use strict; +use base 'TheSchwartz::Worker'; +use DW::LatestFeed; + +sub work { + my ( $class, $job ) = @_; + my $opts = $job->arg; + + # FIXME: we might want to lock here, to protect against the sysadmin running + # more than one copy of this job? otoh, we should just document that there + # should only ever be one of these running. + + # all we do is pass this back to the proper module, this keeps the logic in + # one place so we don't have to track it down through four files :) + DW::LatestFeed->_process_item( $opts ); + + $job->completed; +} + + +1; diff -r 4c1f8b40aa26 -r 57b0fb9cca14 cgi-bin/LJ/User.pm --- a/cgi-bin/LJ/User.pm Fri Oct 30 23:15:10 2009 +0000 +++ b/cgi-bin/LJ/User.pm Sat Oct 31 04:48:12 2009 +0000 @@ -2122,6 +2122,13 @@ sub include_in_global_search { } +# whether this user wants to have their content included in the latest feeds or not +sub include_in_latest_feed { + my $u = $_[0]; + return $u->prop( 'latest_optout' ) ? 0 : 1; +} + + # must be called whenever birthday, location, journal modtime, journaltype, etc. # changes. see LJ/Directory/PackedUserRecord.pm sub invalidate_directory_record { diff -r 4c1f8b40aa26 -r 57b0fb9cca14 cgi-bin/ljlib.pl --- a/cgi-bin/ljlib.pl Fri Oct 30 23:15:10 2009 +0000 +++ b/cgi-bin/ljlib.pl Sat Oct 31 04:48:12 2009 +0000 @@ -54,6 +54,7 @@ use DW::External::User; use DW::External::User; use DW::Logic::LogItems; use LJ::CleanHTML; +use DW::LatestFeed; # make Unicode::MapUTF8 autoload: sub Unicode::MapUTF8::AUTOLOAD { diff -r 4c1f8b40aa26 -r 57b0fb9cca14 cgi-bin/ljprotocol.pl --- a/cgi-bin/ljprotocol.pl Fri Oct 30 23:15:10 2009 +0000 +++ b/cgi-bin/ljprotocol.pl Sat Oct 31 04:48:12 2009 +0000 @@ -1583,6 +1583,9 @@ sub postevent # PubSubHubbub Support LJ::Feed::generate_hubbub_jobs( $uowner, \@jobs ) unless $uowner->is_syndicated; + + # latest posts feed update + DW::LatestFeed->new_item( $entry ); } push @jobs, LJ::EventLogRecord::NewEntry->new($entry)->fire_job; diff -r 4c1f8b40aa26 -r 57b0fb9cca14 doc/config-private.pl.txt --- a/doc/config-private.pl.txt Fri Oct 30 23:15:10 2009 +0000 +++ b/doc/config-private.pl.txt Sat Oct 31 04:48:12 2009 +0000 @@ -104,6 +104,8 @@ # your own (or use any other hub you want). # @HUBBUB_HUBS = ( 'http://pubsubhubbub.appspot.com/' ); + # Add any tags here that you wish to create global 'latest posts' feeds for + # %LATEST_TAGS = ( map { $_ => 1 } qw/ tag othertag bigtag littletag / ); } { diff -r 4c1f8b40aa26 -r 57b0fb9cca14 htdocs/latest.bml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htdocs/latest.bml Sat Oct 31 04:48:12 2009 +0000 @@ -0,0 +1,158 @@ +<?_c +# +# latest.bml +# +# Shows the latest posts, comments, and other things on the site. +# +# Authors: +# Mark Smit <mark@dreamwidth.org> +# +# Copyright (c) 2009 by Dreamwidth Studios, LLC. +# +# This program is free software; you may redistribute it and/or modify it under +# the same terms as Perl itself. For a copy of the license, please reference +# 'perldoc perlartistic' or 'perldoc perlgpl'. +# +_c?><?page +body<= +<?_code +{ + use strict; + use vars qw/ %GET /; + + LJ::need_res( 'stc/latest.css' ); + + my ( $type, $max, $fmt, $tag ) = ( $GET{type}, ($GET{max}+0)||100, $GET{fmt}, $GET{tag} ); + $type = { entries => 'entry', comments => 'comment' }->{$type} || 'entry'; + $max = 100 if $max > 1000; + $fmt = { rss => 'rss', atom => 'atom', html => 'html' }->{$fmt} || 'html'; + $tag = '' unless $tag && exists $LJ::LATEST_TAGS{$tag}; + + # if they want a format we don't support ... FIXME: implement all formats + return "Sorry, that format is not supported yet." + if $fmt ne 'html'; + + # see if somebody has asked for this particular feed in the last minute or so, in + # which case it is going to be in memcache + my $mckey = "latest_src:$type:$max:$fmt" . ( $tag ? ":$tag" : '' ); + my $page = LJ::MemCache::get( $mckey ); + + # return from the cache + if ( $page && $page->[0] > time ) { + LJ::text_uncompress( \$page->[1] ); + return $page->[1]; + } + + # now we need a lock to make sure we're allowed to generate this data + my $lock = LJ::locker()->trylock( $mckey ); + unless ( $lock ) { + # no lock, someone else is updating this. let's try to print out the stale memcache + # page if possible, we know that next time it will be updated + if ( $page && $page->[1] > 0 ) { + LJ::text_uncompress( \$page->[1] ); + return $page->[1]; + } + + # if we get here, we don't have any data, and we don't have the lock so we can't + # construct any data. this should only happen in the rare case of a memcache + # flush when multiple people are hitting the page. + return "Sorry, something happened. Please refresh and try again!"; + } + + # ask for the items from the latest feed + my $items = DW::LatestFeed->get_items( tag => $tag ); + return "Failed to get latest items." + unless $items && ref $items eq 'ARRAY'; + + # now, iterate and extract only the things we want + my @objs; + foreach my $item ( @$items ) { + next unless $item->{type} eq $type; + push @objs, [ $item->{journalid}, $item->{jitemid}, $item->{jtalkid} ]; + } + + # splice off the top number we want + @objs = splice @objs, 0, $max; + + # now get the journalids to load + my $us = LJ::load_userids( map { $_->[0] } @objs ); + + # and now construct real objects + for ( my $i = 0; $i <= $#objs; $i++ ) { + if ( $type eq 'entry' ) { + $objs[$i] = LJ::Entry->new( $us->{$objs[$i]->[0]}, jitemid => $objs[$i]->[1] ); + } elsif ( $type eq 'comment' ) { + $objs[$i] = LJ::Comment->new( $us->{$objs[$i]->[0]}, jtalkid => $objs[$i]->[2] ); + } + } + + # if we're in comment mode, let's construct the entries. we only + # have to reference this so that it gets turned into a singleton + # so later when we call something on an entry it preloads all of them. + if ( $type eq 'comment' ) { + $_->entry foreach @objs; + } + + # output the header data + my $tagfeeds = join ' ', map { $tag eq $_ ? $_ : qq{<a href="$LJ::SITEROOT/latest?tag=$_">$_</a>} } + sort { $a cmp $b } keys %LJ::LATEST_TAGS; + if ( $tag ) { + $tagfeeds = qq{[<a href="$LJ::SITEROOT/latest">show all</a>] } . $tagfeeds; + } + if ( $tagfeeds ) { + $tagfeeds = qq{<?p Tag feeds are available for some well known tags: $tagfeeds p?>}; + } + + my $ret = <<EOF; +<?p Latest things on $LJ::SITENAME. This page shows you a sample of the most recently posted +things that are available on the site. The feed is updated every minute or two. Also, new +posts and comments won't show up until five (5) minutes after they are posted. Just in case +they are accidentally posted public. p?> + +$tagfeeds +EOF + + # great, we now have objects, we can construct our results in some fashion + my $now = time; + foreach my $obj ( @objs ) { + if ( $obj->isa( 'LJ::Comment' ) ) { + + } elsif ( $obj->isa( 'LJ::Entry' ) ) { + # final check, must be public still + next unless $obj->security eq 'public'; + + # output entry HTML + my $subj = $obj->subject_html || 'no subject'; + my $evt = $obj->event_html; + my $tags = join ', ', sort { $a cmp $b } $obj->tags; + my $user = $obj->poster->ljuser_display; + if ( ! $obj->poster->equals( $obj->journal ) ) { + $user .= ' in ' . $obj->journal->ljuser_display; + } + my $time = LJ::ago_text( $now - $obj->logtime_unix ); + my $url = $obj->url; + my $comments = $obj->reply_count == 1 ? "1 comment" : ( $obj->reply_count > 0 ? $obj->reply_count . ' comments' : 'no comments' ); + my $replyurl = $obj->reply_url; + + $ret .= <<EOF; +<div class='latest-entry'> + <div class='tags'>$tags</div> + <div class='author'>$user ($time)</div> + <div class='subject'>$subj</div> + <div class='event'>$evt</div> + <div class='comments'>(<a href='$url'>$comments</a>) (<a href='$replyurl'>reply</a>)</div> +</div> +EOF + } + } + + # final step, toss this in memcache for a minute + LJ::text_compress( \$ret ); + LJ::MemCache::set( $mckey, [ time + 60, $ret ], 90 ); + + return $ret; +} +_code?> +<=body +title=>Latest Things +page?> diff -r 4c1f8b40aa26 -r 57b0fb9cca14 htdocs/stc/latest.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/htdocs/stc/latest.css Sat Oct 31 04:48:12 2009 +0000 @@ -0,0 +1,42 @@ +/* + stc/latest.css + + CSS classes for rendering the latest things page. + + Authors: + Mark Smith <mark@dreamwidth.org> + + Copyright (c) 2009 by Dreamwidth Studios, LLC. + + This program is free software; you may redistribute it and/or modify it under + the same terms as Perl itself. For a copy of the license, please reference + 'perldoc perlartistic' or 'perldoc perlgpl'. +*/ + +.latest-entry { + border: solid 1px #ddd; + margin-bottom: 1.5em; + padding: 0.3em; +} + +.latest-entry .subject { + font-weight: bold; + margin-bottom: 0.3em; +} + +.latest-entry .author { + +} + +.latest-entry .event { + margin-top: 1em; +} + +.latest-entry .tags { + font-style: italic; + float: right; +} + +.latest-entry .comments { + margin-top: 1em; +} --------------------------------------------------------------------------------