[dw-free] Make the comment importer aware of properties. This works both for new imports (we will c
[commit: http://hg.dwscoalition.org/dw-free/rev/7f743ff0a3ef]
Make the comment importer aware of properties. This works both for new
imports (we will copy as many properties as we can) as well as re-imports
(we copy the picture_keyword right now). While this doesn't really do a LOT
right now with all imports on an OpenID account, this will be really useful
when we allow comments by OpenID accounts get reparented to a local user.
Patch by
mark.
Files modified:
Make the comment importer aware of properties. This works both for new
imports (we will copy as many properties as we can) as well as re-imports
(we copy the picture_keyword right now). While this doesn't really do a LOT
right now with all imports on an OpenID account, this will be really useful
when we allow comments by OpenID accounts get reparented to a local user.
Patch by
![[staff profile]](https://www.dreamwidth.org/img/silk/identity/user_staff.png)
Files modified:
- cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm
- cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm
-------------------------------------------------------------------------------- diff -r 234311e53e38 -r 7f743ff0a3ef cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm --- a/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm Mon Oct 19 12:02:38 2009 -0500 +++ b/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm Tue Oct 20 01:35:49 2009 +0000 @@ -221,7 +221,7 @@ sub try_work { $log->( 'Finished fetching metadata.' ); # body handling section now - my ( $lastid, $curid, @tags ) = ( 0, 0 ); + my ( $lastid, $curid, $lastprop, @tags ) = ( 0, 0, undef ); # setup our handlers for body XML info my $body_handler = sub { @@ -235,21 +235,28 @@ sub try_work { $curid = $temp{id}; $meta{$curid}{parentid} = $temp{parentid}+0; $meta{$curid}{jitemid} = $temp{jitemid}+0; + } elsif ( $lasttag eq 'property' ) { + $lastprop = $temp{name}; } }; my $body_closer = sub { # we hit a closing tag so we're not in a tag anymore my $tag = pop @tags; $lasttag = $tags[0]; + $lastprop = undef; }; my $body_content = sub { - # this grabs data inside of comments: body, subject, date + # this grabs data inside of comments: body, subject, date, properties return unless $curid; - return unless $lasttag =~ /(?:body|subject|date)/; - $meta{$curid}{$lasttag} .= $_[1]; - # have to .= it, because the parser will split on punctuation such as an apostrophe - # that may or may not be in the data stream, and we won't know until we've already - # gotten some data + + # have to append to it, because the parser will split on punctuation such as an apostrophe + # that may or may not be in the data stream, and we won't know until we've already gotten + # some data + if ( $lasttag =~ /(?:body|subject|date)/ ) { + $meta{$curid}{$lasttag} .= $_[1]; + } elsif ( $lastprop && $lasttag eq 'property' ) { + $meta{$curid}{props}{$lastprop} .= $_[1]; + } }; # start looping to fetch all of the comment bodies @@ -290,7 +297,7 @@ sub try_work { # reset all text so we don't get it double posted foreach my $cmt ( values %meta ) { delete $cmt->{$_} - foreach qw/ subject body date /; + foreach qw/ subject body date props /; } # and now filter. note that we're assuming this is ISO-8859-1, as that's a @@ -358,7 +365,7 @@ sub try_work { my @to_import = sort { ( $a->{id}+0 ) <=> ( $b->{id}+0 ) } values %meta; my $had_unresolved = 1; - # This loop should never need to run through more then once + # This loop should never need to run through more than once # but, it will *if* for some reason a comment comes before its parent # which *should* never happen, but I'm handling it anyway, just in case. $title->( 'posting %d comments', scalar( @to_import ) ); @@ -378,7 +385,19 @@ sub try_work { # rules we might skip a content with next if $comment->{done}; # Skip this comment if it was already imported this round - next if $jtalkid_map->{$comment->{orig_id}}; # Or on a previous import round + + # if this comment already exists, we might need to update it, however + my $err = ""; + if ( my $jtalkid = $jtalkid_map->{$comment->{orig_id}} ) { + $log->( 'Comment already exists, passing to updater.' ); + + $comment->{id} = $jtalkid; + DW::Worker::ContentImporter::Local::Comments->update_comment( $u, $comment, \$err ); + $log->( 'ERROR: %s', $err ) if $err; + + $comment->{done} = 1; + next; + } # now we know this one is going in the database $ct++; @@ -407,7 +426,6 @@ sub try_work { } # if we get here we're good to insert into the database - my $err = ""; my $talkid = DW::Worker::ContentImporter::Local::Comments->insert_comment( $u, $comment, \$err ); if ( $talkid ) { $log->( 'Successfully imported source %d to new jtalkid %d.', $comment->{id}, $talkid ); @@ -417,7 +435,7 @@ sub try_work { } # store this information - $jtalkid_map->{$comment->{id}} = $talkid; + $jtalkid_map->{$comment->{orig_id}} = $talkid; $comment->{id} = $talkid; $comment->{done} = 1; } @@ -447,7 +465,9 @@ sub do_authed_comment_fetch { # hit up the server with the specified information and return the raw content my $ua = LWP::UserAgent->new; my $authas = $data->{usejournal} ? "&authas=$data->{usejournal}" : ''; - my $request = HTTP::Request->new( GET => "http://www.$data->{hostname}/export_comments.bml?get=$mode&startid=$startid&numitems=$numitems$authas" ); + my $request = HTTP::Request->new( + GET => "http://www.$data->{hostname}/export_comments.bml?get=$mode&startid=$startid&numitems=$numitems&props=1$authas" + ); $request->push_header( Cookie => "ljsession=$data->{_session}" ); # try to get the response diff -r 234311e53e38 -r 7f743ff0a3ef cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm --- a/cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm Mon Oct 19 12:02:38 2009 -0500 +++ b/cgi-bin/DW/Worker/ContentImporter/Local/Comments.pm Tue Oct 20 01:35:49 2009 +0000 @@ -51,6 +51,30 @@ sub get_comment_map { return \%map; } + +=head2 C<< $class->update_comment( $u, $comment, $errref ) >> + +Called by the importer when it has gotten a copy of a comment and wants to make sure that our local +copy of a comment is syncronized. + +$comment is a hashref representation of a single comment, same as for <<insert_comment>>. + +$errref is a scalar reference to put any error text in. + +=cut + +sub update_comment { + my ( $class, $u, $cmt, $errref ) = @_; + $errref ||= ''; + + # FIXME: we should try to do more than just update the picture keyword, this should handle + # edits and such. for now, I'm just trying to get the icons to update... + my $c = LJ::Comment->instance( $u, jtalkid => $cmt->{id} ) + or return $$errref = 'Unable to instantiate LJ::Comment object.'; + $c->set_prop( picture_keyword => $cmt->{props}->{picture_keyword} ); +} + + =head2 C<< $class->insert_comment( $u, $comment, $errref ) >> $comment is a hashref representation of a single comment, with the following format: @@ -64,6 +88,8 @@ sub get_comment_map { parentid => $local_parent, + props => { ... }, # hashref of talkprops + state => 'A', } @@ -76,10 +102,6 @@ sub insert_comment { $errref ||= ''; # load the data we need to make this comment - # FIXME: What is the point of this? - use Data::Dumper; - warn Dumper( $cmt ) unless $cmt->{jitemid}; - my $jitem = LJ::Entry->new( $u, jitemid => $cmt->{jitemid} ); my $source = ( $cmt->{entry_source} || $jitem->prop( "import_source" ) ) . "?thread=" . ( $cmt->{id} << 8 ); my $user = LJ::load_userid( $cmt->{posterid} ) @@ -91,8 +113,14 @@ sub insert_comment { $date =~ s/Z//; # sometimes the date is empty - # FIXME: why? Dre had this, when can the date be empty? $date ||= LJ::mysql_time(); + + # remove properties that we don't know or care about + foreach my $name ( keys %{$cmt->{props} || {}} ) { + delete $cmt->{props}->{$name} + unless LJ::get_prop( talk => $name ) && + ( $name ne 'import_source' && $name ne 'imported_from' ); + } # build the data structures we use. we are sort of faking it here. my $comment = { @@ -102,9 +130,17 @@ sub insert_comment { state => $cmt->{state}, u => $user, + # we have to promote these from properties to the main comment hash so that + # the enter_imported_comment function can demote them back to properties + picture_keyword => delete $cmt->{props}->{picture_keyword}, + preformat => delete $cmt->{props}->{opt_preformatted}, + subjecticon => delete $cmt->{props}->{subjecticon}, + unknown8bit => delete $cmt->{props}->{unknown8bit}, + props => { import_source => $source, imported_from => $cmt->{source}, + %{$cmt->{props} || {}}, }, no_urls => 1, --------------------------------------------------------------------------------