mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-11-03 12:48 am

[dw-free] interests / keywords cleanup

[commit: http://hg.dwscoalition.org/dw-free/rev/0df2b9de2ffc]

http://bugs.dwscoalition.org/show_bug.cgi?id=1776

Migrate interests functionality to using methods for database access and the
new sitekeywords table.

Patch by [personal profile] kareila.

Files modified:
  • bin/maint/stats.pl
  • bin/upgrading/base-data.sql
  • bin/upgrading/s2layers/tranquilityiii/themes.s2
  • bin/upgrading/update-db-general.pl
  • cgi-bin/LJ/Directory/Constraint/Interest.pm
  • cgi-bin/LJ/User.pm
  • cgi-bin/ljlib.pl
  • htdocs/interests.bml
  • htdocs/misc/interestdata.bml
--------------------------------------------------------------------------------
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/maint/stats.pl
--- a/bin/maint/stats.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/maint/stats.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -63,8 +63,9 @@ require "$ENV{'LJHOME'}/cgi-bin/statslib
                    my $stat_min = int($prev_min - (0.2*$prev_max));
                    $stat_min = 1 if $stat_min < 1;
 
-                   my $sth = $db->prepare("SELECT interest, intcount FROM interests WHERE intcount>? " .
-                                          "ORDER BY intcount DESC, interest ASC LIMIT 400");
+                   my $sth = $db->prepare( "SELECT k.keyword, i.intcount FROM interests AS i, sitekeywords AS k " .
+                                           "WHERE k.kwid=i.intid AND i.intcount>? " .
+                                           "ORDER BY i.intcount DESC, k.keyword ASC LIMIT 400" );
                    $sth->execute($stat_min);
                    die $db->errstr if $db->err;
 
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/base-data.sql
--- a/bin/upgrading/base-data.sql	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/base-data.sql	Tue Nov 03 00:48:18 2009 +0000
@@ -651,7 +651,6 @@ REPLACE INTO schemacols (colname, des, t
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('instime', 'The time the action first completed.', 'duplock');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('instime', 'The time the query was buffered. Not used by applications, but useful for debugging/humans.', 'cmdbuffer');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('intcount', 'The number of users listing the interest.', 'interests');
-REPLACE INTO schemacols (colname, des, tablename) VALUES ('interest', 'The text of the interest.', 'interests');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('intid', 'Unique id assigned to the interest.', 'interests');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('is_dirty', 'One of either 1 (indexed), or NULL (not in index) - means we need to update the target\'s etypeid.', 'subs');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('item', 'base64digest of rss $item', 'synitem');
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/s2layers/tranquilityiii/themes.s2
--- a/bin/upgrading/s2layers/tranquilityiii/themes.s2	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/s2layers/tranquilityiii/themes.s2	Tue Nov 03 00:48:18 2009 +0000
@@ -294,83 +294,4 @@ set color_header_background = "#6ab1b2";
 
 set color_footer_background = "#6ab1b2";
 set color_footer_link = "#effeff";
-set color_footer_link_active = "#effeff";
-set color_footer_link_hover = "#0a566c";
-set color_footer_link_visited = "#effeff";
-
-##===============================
-## Entry Colors
-##===============================
-
-set color_entry_background = "#effeff";
-set color_entry_border = "#effeff";
-set color_entry_text = "#000000";
-set color_entry_link = "#0a566c";
-set color_entry_link_active = "#0a566c";
-set color_entry_link_hover = "#6ab1b2";
-set color_entry_link_visited = "#0a566c";
-set color_entry_interaction_links = "#0a566c";
-
-set color_entry_title_background = "#effeff";
-set color_entry_title = "#000000";
-
-set color_comment_title_background = "#effeff";
-
-##===============================
-## Module Colors
-##===============================
-
-set color_module_background = "#6ab1b2";
-set color_module_border = "#6ab1b2";
-set color_module_text = "#000000";
-set color_module_link = "#effeff";
-set color_module_link_active = "#effeff";
-set color_module_link_hover = "#0a566c";
-set color_module_link_visited = "#effeff";
-
-set color_module_title = "#000000";
-set color_module_title_background = "#6ab1b2";
-
-
-#NEWLAYER: tranquilityiii/wintergreen
-layerinfo type = "theme";
-layerinfo name = "Wintergreen";
-layerinfo redist_uniq = "tranquilityiii/wintergreen";
-layerinfo author_name = "dancing_serpent";
-
-set layout_authors = [ { "name" => "dancing_serpent", "type" => "user" } ];
-
-set color_footer_link_hover = "#6ab23b";
-set color_page_link_active = "#688052";
-set color_page_link = "#688052";
-set color_module_link_hover = "#6ab23b";
-set color_module_background = "#b4cea1";
-set color_entry_border = "#0a440e";
-set color_entry_text = "#0d1402";
-set color_module_title = "#0d1402";
-set color_page_text = "#0d1402";
-set color_footer_background = "#f2fff2";
-set color_header_background = "#b4cea1";
-set color_page_border = "#f2fff2";
-set color_entry_interaction_links = "#688052";
-set color_comment_title_background = "#f2fff2";
-set color_page_background = "#0a440e";
-set color_module_title_background = "#b4cea1";
-set color_entry_link_hover = "#6ab23b";
-set color_page_title = "#0d1402";
-set color_footer_link_active = "#688052";
-set color_footer_link_visited = "#688052";
-set color_module_border = "#b4cea1";
-set color_entry_link = "#688052";
-set color_module_link = "#f2fff2";
-set color_entry_link_visited = "#688052";
-set color_module_link_visited = "#f2fff2";
-set color_entry_title = "#0d1402";
-set color_page_link_visited = "#688052";
-set color_entry_title_background = "#f2fff2";
-set color_page_link_hover = "#6ab23b";
-set color_entry_link_active = "#688052";
-set color_module_link_active = "#f2fff2";
-set color_module_text = "#0d1402";
-set color_entry_background = "#f2fff2";
-set color_footer_link = "#688052";
+set color_footer_link_active = "#effeff"
\ No newline at end of file
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/update-db-general.pl
--- a/bin/upgrading/update-db-general.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/update-db-general.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -152,12 +152,10 @@ EOC
 
 register_tablecreate("interests", <<'EOC');
 CREATE TABLE interests (
-    intid int(10) unsigned NOT NULL auto_increment,
-    interest varchar(255) NOT NULL default '',
+    intid int(10) unsigned NOT NULL,
     intcount mediumint(8) unsigned default NULL,
 
-    PRIMARY KEY  (intid),
-    UNIQUE interest (interest)
+    PRIMARY KEY  (intid)
 )
 EOC
 
@@ -3835,6 +3833,17 @@ EOF
                   q{ALTER TABLE acctcode_promo ADD COLUMN suggest_journalid INT UNSIGNED} );
     }
 
+    # migrate interest names to sitekeywords
+    if ( table_relevant( "sitekeywords" ) && table_relevant( "interests" )
+          && column_type( "interests", "interest" ) ) {
+        do_sql( 'LOCK TABLES sitekeywords WRITE, interests WRITE' );
+        do_sql( "REPLACE INTO sitekeywords (kwid, keyword) ".
+                "SELECT intid, interest FROM interests" );
+        do_alter( "interests",
+                  "ALTER TABLE interests DROP interest" );
+        do_sql( 'UNLOCK TABLES' );
+    }
+
 });
 
 
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/LJ/Directory/Constraint/Interest.pm
--- a/cgi-bin/LJ/Directory/Constraint/Interest.pm	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/LJ/Directory/Constraint/Interest.pm	Tue Nov 03 00:48:18 2009 +0000
@@ -31,16 +31,13 @@ sub load_row {
 sub load_row {
     my $self = shift;
     $self->{_loaded_row} = 1;
-    my $row;
 
-    my $field = $self->{intid} ? "intid" : "interest";
-    return unless $self->{$field};
+    if ( $self->{interest} && ! $self->{intid} ) {
+        $self->{intid} = LJ::get_sitekeyword_id( $self->{interest}, 0 );
+    }
+    return unless $self->{intid};
 
-    my $db = LJ::get_dbh("directory") || LJ::get_db_reader();
-    $row = $db->selectrow_hashref("SELECT intid, interest, intcount FROM interests WHERE $field=?",
-                                  undef, $self->{$field});
-
-    $self->{$_} = $row->{$_} foreach (qw(intid interest intcount));
+    ( $self->{interest}, $self->{intcount} ) = LJ::get_interest( $self->{intid} );
 }
 
 sub matching_uids {
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/LJ/User.pm
--- a/cgi-bin/LJ/User.pm	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/LJ/User.pm	Tue Nov 03 00:48:18 2009 +0000
@@ -4122,14 +4122,17 @@ sub get_interests {
     if ( %need ) {
         my $ids = join( ",", map { $_ + 0 } keys %need );
         my $dbr = LJ::get_db_reader();
-        my $sth = $dbr->prepare( "SELECT intid, interest, intcount FROM interests ".
-                                 "WHERE intid IN ($ids)" );
-        $sth->execute;
+        my $ints = $dbr->selectall_hashref( "SELECT kwid, keyword FROM sitekeywords ".
+                                            "WHERE kwid IN ($ids)", 'kwid' );
+        my $counts = $dbr->selectall_hashref( "SELECT intid, intcount FROM interests ".
+                                              "WHERE intid IN ($ids)", 'intid' );
         my $memc_store = 0;
-        while ( my ($intid, $int, $count) = $sth->fetchrow_array ) {
+        foreach my $intid ( keys %$ints ) {
+            my $int = $ints->{$intid}->{keyword};
+            my $count = $counts->{$intid}->{intcount} + 0;
+            my $aref = [$intid, $int, $count];
             # minimize latency... only store 25 into memcache at a time
             # (too bad we don't have set_multi.... hmmmm)
-            my $aref = [$intid, $int, $count];
             if ( $memc_store++ < 25 ) {
                 # if the count is fairly high, keep item in memcache longer,
                 # since count's not so important.
@@ -4213,7 +4216,7 @@ sub set_interests {
     my %int_new = ();
     my %int_del = %$old;  # assume deleting everything, unless in @$new
 
-    # user interests go in a different table than user interests,
+    # community interests go in a different table than user interests,
     # though the schemas are the same so we can run the same queries on them
     my $uitable = $u->is_community ? 'comminterests' : 'userinterests';
 
@@ -4221,15 +4224,13 @@ sub set_interests {
     my $did_mod = 0;
 
     my @valid_ints = LJ::validate_interest_list(@$new);
-    foreach my $int (@valid_ints)
-    {
+    foreach my $int ( @valid_ints ) {
         $int_new{$int} = 1 unless $old->{$int};
         delete $int_del{$int};
     }
 
     ### were interests removed?
-    if (%int_del)
-    {
+    if ( %int_del ) {
         ## easy, we know their IDs, so delete them en masse
         my $intid_in = join(", ", values %int_del);
         $dbh->do("DELETE FROM $uitable WHERE userid=$userid AND intid IN ($intid_in)");
@@ -4239,17 +4240,14 @@ sub set_interests {
 
     ### do we have new interests to add?
     my @new_intids = ();  ## existing IDs we'll add for this user
-    if (%int_new)
-    {
+    if ( %int_new ) {
         $did_mod = 1;
 
-        ## difficult, have to find intids of interests, and create new ints for interests
-        ## that nobody has ever entered before
         my $int_in = join(", ", map { $dbh->quote($_); } keys %int_new);
-        my %int_exist;
 
         ## find existing IDs
-        my $sth = $dbh->prepare("SELECT interest, intid FROM interests WHERE interest IN ($int_in)");
+        my $sth = $dbh->prepare( "SELECT keyword, kwid FROM sitekeywords " .
+                                 "WHERE keyword IN ($int_in)" );
         $sth->execute;
         while (my ($intr, $intid) = $sth->fetchrow_array) {
             push @new_intids, $intid;       # - we'll add this later.
@@ -4257,14 +4255,11 @@ sub set_interests {
                                             #   this next pass.
         }
 
-        if (@new_intids) {
-            my $sql = "";
-            foreach my $newid (@new_intids) {
-                if ($sql) { $sql .= ", "; }
-                else { $sql = "REPLACE INTO $uitable (userid, intid) VALUES "; }
-                $sql .= "($userid, $newid)";
-            }
-            $dbh->do($sql);
+        ## do updating en masse for interests that already exist
+        if ( @new_intids ) {
+            my $sql = "REPLACE INTO $uitable (userid, intid) VALUES ";
+            $sql .= join( ", ", map { "($userid, $_)" } @new_intids );
+            $dbh->do( $sql );
 
             my $intid_in = join(", ", @new_intids);
             $dbh->do("UPDATE interests SET intcount=intcount+1 WHERE intid IN ($intid_in)");
@@ -4272,29 +4267,23 @@ sub set_interests {
     }
 
     ### do we STILL have interests to add?  (must make new intids)
-    if (%int_new)
-    {
-        foreach my $int (keys %int_new)
-        {
-            my $intid;
-            my $qint = $dbh->quote($int);
-
-            $dbh->do("INSERT INTO interests (intid, intcount, interest) ".
-                     "VALUES (NULL, 1, $qint)");
-            if ($dbh->err) {
-                # somebody beat us to creating it.  find its id.
-                $intid = $dbh->selectrow_array("SELECT intid FROM interests WHERE interest=$qint");
-                $dbh->do("UPDATE interests SET intcount=intcount+1 WHERE intid=$intid");
-            } else {
+    if ( %int_new ) {
+        foreach my $int ( keys %int_new ) {
+            my $intid = LJ::get_sitekeyword_id( $int );
+            next unless $intid;
+
+            my $rows = $dbh->do( "UPDATE interests SET intcount=intcount+1 WHERE intid=?",
+                                 undef, $intid );
+            if ( $rows eq "0E0") {
                 # newly created
-                $intid = $dbh->{'mysql_insertid'};
-            }
-            if ($intid) {
-                ## now we can actually insert it into the userinterests table:
-                $dbh->do("INSERT INTO $uitable (userid, intid) ".
-                         "VALUES ($userid, $intid)");
-                push @new_intids, $intid;
-            }
+                $dbh->do( "INSERT INTO interests (intid, intcount) VALUES (?,?)",
+                          undef, $intid, 1 );
+            }
+            next if $dbh->err;
+            ## now we can actually insert it into the userinterests table:
+            $dbh->do( "INSERT INTO $uitable (userid, intid) VALUES (?,?)",
+                      undef, $userid, $intid );
+            push @new_intids, $intid;
         }
     }
     LJ::run_hooks("set_interests", $u, \%int_del, \@new_intids); # interest => intid
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/ljlib.pl
--- a/cgi-bin/ljlib.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/ljlib.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -1522,8 +1522,12 @@ sub get_interest {
         ( $intid, $int, $intcount ) = @$cached;
     } else {
         my $dbr = LJ::get_db_reader();
-        ( $int, $intcount ) =
-            $dbr->selectrow_array( "SELECT interest, intcount FROM interests WHERE intid=?",
+        ( $int ) =
+            $dbr->selectrow_array( "SELECT keyword FROM sitekeywords WHERE kwid=?",
+                                          undef, $intid );
+        die $dbr->errstr if $dbr->err;
+        ( $intcount ) =
+            $dbr->selectrow_array( "SELECT intcount FROM interests WHERE intid=?",
                                    undef, $intid );
         die $dbr->errstr if $dbr->err;
         LJ::MemCache::set( $memkey, [$intid, $int, $intcount], 3600*12 );
diff -r 1047fd6b29d6 -r 0df2b9de2ffc htdocs/interests.bml
--- a/htdocs/interests.bml	Mon Nov 02 07:49:12 2009 -0600
+++ b/htdocs/interests.bml	Tue Nov 03 00:48:18 2009 +0000
@@ -366,18 +366,11 @@ body<=
         return $ret;
     }
 
-    if (!$did_post && ($GET{'intid'} || $GET{'int'})) {
-        my $sth;
-        my $dbr = LJ::get_db_reader();
-        my ($interest, $intid, $intcount);
-        if ($GET{'intid'}) {
-            ($interest, $intid, $intcount) = $dbr->selectrow_array("SELECT interest, intid, intcount
-                FROM interests WHERE intid=?", undef, $GET{'intid'});
-        } else {
-            ($interest, $intid, $intcount) = $dbr->selectrow_array("SELECT interest, intid, intcount
-                FROM interests WHERE interest=?", undef, $GET{'int'});
-        }
-
+    if ( ! $did_post && ( $GET{intid} || $GET{int} ) ) {
+        my $intid = $GET{intid} ? $GET{intid} :
+            LJ::get_sitekeyword_id( $GET{int}, 0 );
+        my ( $interest, $intcount ) = LJ::get_interest( $intid );
+        
         my $check_int = $GET{int} || $interest;
         if (LJ::run_hook("interest_search_ignore", query => $check_int, intid => $intid)) {
             return "<?h1 $ML{'Error'} h1?><?p $ML{'.error.ignored'} p?>";
@@ -421,6 +414,7 @@ body<=
         });
 
         ### communities
+        my $dbr = LJ::get_db_reader();
         my $LIMIT = 500;
 
         my $int_query = sub {
diff -r 1047fd6b29d6 -r 0df2b9de2ffc htdocs/misc/interestdata.bml
--- a/htdocs/misc/interestdata.bml	Mon Nov 02 07:49:12 2009 -0600
+++ b/htdocs/misc/interestdata.bml	Tue Nov 03 00:48:18 2009 +0000
@@ -26,14 +26,11 @@
     }
 
     if ($GET{'int'}) {
-        my $dbr = LJ::get_db_reader();
-        my $sth = $dbr->prepare("SELECT * FROM interests WHERE interest=?");
-        $sth->execute($GET{'int'});
-        my $h = $sth->fetchrow_hashref;
-        return "! invalid interest\n" unless $h;
-        my $ret;
-        $ret = "# Note: Polite data miners cache on their end.  Impolite ones get banned.\n" .
-            "= intcount=$h->{intcount}\n";
+        my $intid = LJ::get_sitekeyword_id( $GET{int}, 0 );
+        return "! invalid interest\n" unless $intid;
+        my ($int, $intcount) = LJ::get_interest( $intid );
+        my $ret = "# Note: Polite data miners cache on their end.  Impolite ones get banned.\n" .
+            "= intcount=$intcount\n";
         return $ret;
     }
     return "! invalid parameters\n";
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org