[dw-free] Allow importer to correctly import from multiple sites.
[commit: http://hg.dwscoalition.org/dw-free/rev/6f8927457c6f]
http://bugs.dwscoalition.org/show_bug.cgi?id=596
Fix importing from multiple sites and putting comments on the right entry.
Simple, but gets the job done.
Patch by
mark.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=596
Fix importing from multiple sites and putting comments on the right entry.
Simple, but gets the job done.
Patch by
![[staff profile]](https://www.dreamwidth.org/img/silk/identity/user_staff.png)
Files modified:
- cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm
- cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm
-------------------------------------------------------------------------------- diff -r 31f6db459a34 -r 6f8927457c6f cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm --- a/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm Tue Apr 21 06:05:50 2009 +0000 +++ b/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Comments.pm Tue Apr 21 06:55:54 2009 +0000 @@ -98,6 +98,9 @@ sub try_work { # now backfill into jitemid_map my $jitemid_map = {}; foreach my $url ( keys %$entry_map ) { + # this works, see the Entries importer for more information + next unless $url =~ /\Q$data->{hostname}\E/; + my $jitemid = $1 >> 8 if $url =~ m!/(\d+)\.html$!; $jitemid_map->{$jitemid} = $entry_map->{$url}; diff -r 31f6db459a34 -r 6f8927457c6f cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm --- a/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm Tue Apr 21 06:05:50 2009 +0000 +++ b/cgi-bin/DW/Worker/ContentImporter/LiveJournal/Entries.pm Tue Apr 21 06:55:54 2009 +0000 @@ -136,6 +136,13 @@ sub try_work { # post that we already know about. (not that we really care, but it's much nicer # on people we're pulling from.) foreach my $url ( keys %$entry_map ) { + + # but first, let's skip anything that isn't from the server we are importing + # from. this assumes URLs never have other hostnames, so if someone were to + # register testlivejournal.com and do an import, they will have trouble + # importing. if they want to do that to befunge this logic, more power to them. + next unless $url =~ /\Q$data->{hostname}\E/; + unless ( $url =~ m!/(\d+)\.html$! ) { $log->( 'URL %s not of expected format in prune.', $url ); next; --------------------------------------------------------------------------------