mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-10-20 01:35 am

[dw-free] Make the comment importer aware of properties. This works both for new imports (we will c

[commit: http://hg.dwscoalition.org/dw-free/rev/7f743ff0a3ef]

Make the comment importer aware of properties. This works both for new
imports (we will copy as many properties as we can) as well as re-imports
(we copy the picture_keyword right now). While this doesn't really do a LOT
right now with all imports on an OpenID account, this will be really useful
when we allow comments by OpenID accounts get reparented to a local user.

Patch by [staff profile] mark.

Files modified:
  • cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm
  • cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm
--------------------------------------------------------------------------------
diff -r 234311e53e38 -r 7f743ff0a3ef cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm
--- a/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm	Mon Oct 19 12:02:38 2009 -0500
+++ b/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm	Tue Oct 20 01:35:49 2009 +0000
@@ -221,7 +221,7 @@ sub try_work {
     $log->( 'Finished fetching metadata.' );
 
     # body handling section now
-    my ( $lastid, $curid, @tags ) = ( 0, 0 );
+    my ( $lastid, $curid, $lastprop, @tags ) = ( 0, 0, undef );
 
     # setup our handlers for body XML info
     my $body_handler = sub {
@@ -235,21 +235,28 @@ sub try_work {
             $curid = $temp{id};
             $meta{$curid}{parentid} = $temp{parentid}+0;
             $meta{$curid}{jitemid} = $temp{jitemid}+0;
+        } elsif ( $lasttag eq 'property' ) {
+            $lastprop = $temp{name};
         }
     };
     my $body_closer = sub {
         # we hit a closing tag so we're not in a tag anymore
         my $tag = pop @tags;
         $lasttag = $tags[0];
+        $lastprop = undef;
     };
     my $body_content = sub {
-        # this grabs data inside of comments: body, subject, date
+        # this grabs data inside of comments: body, subject, date, properties
         return unless $curid;
-        return unless $lasttag =~ /(?:body|subject|date)/;
-        $meta{$curid}{$lasttag} .= $_[1];
-        # have to .= it, because the parser will split on punctuation such as an apostrophe
-        # that may or may not be in the data stream, and we won't know until we've already
-        # gotten some data
+
+        # have to append to it, because the parser will split on punctuation such as an apostrophe
+        # that may or may not be in the data stream, and we won't know until we've already gotten
+        # some data
+        if ( $lasttag =~ /(?:body|subject|date)/ ) {
+            $meta{$curid}{$lasttag} .= $_[1];
+        } elsif ( $lastprop && $lasttag eq 'property' ) {
+            $meta{$curid}{props}{$lastprop} .= $_[1];
+        }
     };
 
     # start looping to fetch all of the comment bodies
@@ -290,7 +297,7 @@ sub try_work {
                 # reset all text so we don't get it double posted
                 foreach my $cmt ( values %meta ) {
                     delete $cmt->{$_}
-                        foreach qw/ subject body date /;
+                        foreach qw/ subject body date props /;
                 }
 
                 # and now filter.  note that we're assuming this is ISO-8859-1, as that's a
@@ -358,7 +365,7 @@ sub try_work {
     my @to_import = sort { ( $a->{id}+0 ) <=> ( $b->{id}+0 ) } values %meta;
     my $had_unresolved = 1;
 
-    # This loop should never need to run through more then once
+    # This loop should never need to run through more than once
     # but, it will *if* for some reason a comment comes before its parent
     # which *should* never happen, but I'm handling it anyway, just in case.
     $title->( 'posting %d comments', scalar( @to_import ) );
@@ -378,7 +385,19 @@ sub try_work {
 
             # rules we might skip a content with
             next if $comment->{done}; # Skip this comment if it was already imported this round
-            next if $jtalkid_map->{$comment->{orig_id}}; # Or on a previous import round
+
+            # if this comment already exists, we might need to update it, however
+            my $err = "";
+            if ( my $jtalkid = $jtalkid_map->{$comment->{orig_id}} ) {
+                $log->( 'Comment already exists, passing to updater.' );
+
+                $comment->{id} = $jtalkid;
+                DW::Worker::ContentImporter::Local::Comments->update_comment( $u, $comment, \$err );
+                $log->( 'ERROR: %s', $err ) if $err;
+
+                $comment->{done} = 1;
+                next;
+            }
 
             # now we know this one is going in the database
             $ct++;
@@ -407,7 +426,6 @@ sub try_work {
             }
 
             # if we get here we're good to insert into the database
-            my $err = "";
             my $talkid = DW::Worker::ContentImporter::Local::Comments->insert_comment( $u, $comment, \$err );
             if ( $talkid ) {
                 $log->( 'Successfully imported source %d to new jtalkid %d.', $comment->{id}, $talkid );
@@ -417,7 +435,7 @@ sub try_work {
             }
 
             # store this information
-            $jtalkid_map->{$comment->{id}} = $talkid;
+            $jtalkid_map->{$comment->{orig_id}} = $talkid;
             $comment->{id} = $talkid;
             $comment->{done} = 1;
         }
@@ -447,7 +465,9 @@ sub do_authed_comment_fetch {
     # hit up the server with the specified information and return the raw content
     my $ua = LWP::UserAgent->new;
     my $authas = $data->{usejournal} ? "&authas=$data->{usejournal}" : '';
-    my $request = HTTP::Request->new( GET => "http://www.$data->{hostname}/export_comments.bml?get=$mode&startid=$startid&numitems=$numitems$authas" );
+    my $request = HTTP::Request->new(
+        GET => "http://www.$data->{hostname}/export_comments.bml?get=$mode&startid=$startid&numitems=$numitems&props=1$authas"
+    );
     $request->push_header( Cookie => "ljsession=$data->{_session}" );
 
     # try to get the response
diff -r 234311e53e38 -r 7f743ff0a3ef cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm
--- a/cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm	Mon Oct 19 12:02:38 2009 -0500
+++ b/cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm	Tue Oct 20 01:35:49 2009 +0000
@@ -51,6 +51,30 @@ sub get_comment_map {
     return \%map;
 }
 
+
+=head2 C<< $class->update_comment( $u, $comment, $errref ) >>
+
+Called by the importer when it has gotten a copy of a comment and wants to make sure that our local
+copy of a comment is syncronized.
+
+$comment is a hashref representation of a single comment, same as for <<insert_comment>>.
+
+$errref is a scalar reference to put any error text in.
+
+=cut
+
+sub update_comment {
+    my ( $class, $u, $cmt, $errref ) = @_;
+    $errref ||= '';
+
+    # FIXME: we should try to do more than just update the picture keyword, this should handle
+    # edits and such.  for now, I'm just trying to get the icons to update...
+    my $c = LJ::Comment->instance( $u, jtalkid => $cmt->{id} )
+        or return $$errref = 'Unable to instantiate LJ::Comment object.';
+    $c->set_prop( picture_keyword => $cmt->{props}->{picture_keyword} );
+}
+
+
 =head2 C<< $class->insert_comment( $u, $comment, $errref ) >>
 
 $comment is a hashref representation of a single comment, with the following format:
@@ -64,6 +88,8 @@ sub get_comment_map {
 
     parentid => $local_parent,
 
+    props => { ... }, # hashref of talkprops
+
     state => 'A',
   }
 
@@ -76,10 +102,6 @@ sub insert_comment {
     $errref ||= '';
 
     # load the data we need to make this comment
-    # FIXME: What is the point of this?
-    use Data::Dumper;
-    warn Dumper( $cmt ) unless $cmt->{jitemid};
-
     my $jitem = LJ::Entry->new( $u, jitemid => $cmt->{jitemid} );
     my $source = ( $cmt->{entry_source} || $jitem->prop( "import_source" ) ) . "?thread=" . ( $cmt->{id} << 8 );
     my $user = LJ::load_userid( $cmt->{posterid} )
@@ -91,8 +113,14 @@ sub insert_comment {
     $date =~ s/Z//;
 
     # sometimes the date is empty
-    # FIXME: why?  Dre had this, when can the date be empty?
     $date ||= LJ::mysql_time();
+
+    # remove properties that we don't know or care about
+    foreach my $name ( keys %{$cmt->{props} || {}} ) {
+        delete $cmt->{props}->{$name}
+            unless LJ::get_prop( talk => $name ) &&
+                ( $name ne 'import_source' && $name ne 'imported_from' );
+    }
 
     # build the data structures we use.  we are sort of faking it here.
     my $comment = {
@@ -102,9 +130,17 @@ sub insert_comment {
         state => $cmt->{state},
         u => $user,
 
+        # we have to promote these from properties to the main comment hash so that
+        # the enter_imported_comment function can demote them back to properties
+        picture_keyword => delete $cmt->{props}->{picture_keyword},
+        preformat       => delete $cmt->{props}->{opt_preformatted},
+        subjecticon     => delete $cmt->{props}->{subjecticon},
+        unknown8bit     => delete $cmt->{props}->{unknown8bit},
+
         props => {
             import_source => $source,
             imported_from => $cmt->{source},
+            %{$cmt->{props} || {}},
         },
 
         no_urls => 1,
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org