mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-04-07 08:31 am

[dw-free] Allow importing of your journal from another LiveJournal-based site.

[commit: http://hg.dwscoalition.org/dw-free/rev/3d033d7345ff]

http://bugs.dwscoalition.org/show_bug.cgi?id=114

Fix importer issue: sometimes we get a phantom entry from the source and
need to skip it if the getevents won't return it.

Patch by [staff profile] mark.

Files modified:
  • cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm
--------------------------------------------------------------------------------
diff -r b311886af1cf -r 3d033d7345ff cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm
--- a/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm	Tue Apr 07 00:03:24 2009 +0000
+++ b/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm	Tue Apr 07 08:31:23 2009 +0000
@@ -137,7 +137,7 @@ sub try_work {
 
     # now get the actual events
     while ( scalar( keys %sync ) > 0 ) {
-        my $count = 0;
+        my ( $count, $last_itemid ) = ( 0, undef );
 
         # calculate what time to get entries for
         my ( $tries, $lastgrab, $hash ) = ( 0, undef, undef );
@@ -146,9 +146,10 @@ sub try_work {
             # $tries, so we can break the 'broken client' logic (note: we assert that we are
             # not broken.)
             my @keys = sort { $sync{$a}->[1] cmp $sync{$b}->[1] } keys %sync;
-            $lastgrab = $step_time->( $sync{$keys[0]}->[1], -$tries );
+            $last_itemid = $keys[0];
+            $lastgrab = $step_time->( $sync{$last_itemid}->[1], -($tries+100) );
 
-            $log->( 'Loading entries; lastsync = %s.', $lastgrab );
+            $log->( 'Loading entries; lastsync = %s, itemid = %d.', $lastgrab, $keys[0] );
             $hash = $class->call_xmlrpc( $data, 'getevents',
                 {
                     ver         => 1,
@@ -238,6 +239,12 @@ sub try_work {
             ) if @item_errors;
         }
 
+        # if we get here, we got a good result, which means that the entry we tried to get
+        # should be in the results.  if it's not, to prevent an infinite loop, let's mark
+        # it as retrieved.  FIXME: this causes problems with mass-edited journals
+        delete $sync{$last_itemid} if defined $last_itemid;
+
+        # log some status for later
         $log->( '    counted %d entries, lastgrab is now %s.', $count, $lastgrab );
     }
 
--------------------------------------------------------------------------------