mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-11-03 12:48 am

[dw-free] interests / keywords cleanup

[commit: http://hg.dwscoalition.org/dw-free/rev/0df2b9de2ffc]

http://bugs.dwscoalition.org/show_bug.cgi?id=1776

Migrate interests functionality to using methods for database access and the
new sitekeywords table.

Patch by [personal profile] kareila.

Files modified:
  • bin/maint/stats.pl
  • bin/upgrading/base-data.sql
  • bin/upgrading/s2layers/tranquilityiii/themes.s2
  • bin/upgrading/update-db-general.pl
  • cgi-bin/LJ/Directory/Constraint/Interest.pm
  • cgi-bin/LJ/User.pm
  • cgi-bin/ljlib.pl
  • htdocs/interests.bml
  • htdocs/misc/interestdata.bml
--------------------------------------------------------------------------------
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/maint/stats.pl
--- a/bin/maint/stats.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/maint/stats.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -63,8 +63,9 @@ require "$ENV{'LJHOME'}/cgi-bin/statslib
                    my $stat_min = int($prev_min - (0.2*$prev_max));
                    $stat_min = 1 if $stat_min < 1;
 
-                   my $sth = $db->prepare("SELECT interest, intcount FROM interests WHERE intcount>? " .
-                                          "ORDER BY intcount DESC, interest ASC LIMIT 400");
+                   my $sth = $db->prepare( "SELECT k.keyword, i.intcount FROM interests AS i, sitekeywords AS k " .
+                                           "WHERE k.kwid=i.intid AND i.intcount>? " .
+                                           "ORDER BY i.intcount DESC, k.keyword ASC LIMIT 400" );
                    $sth->execute($stat_min);
                    die $db->errstr if $db->err;
 
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/base-data.sql
--- a/bin/upgrading/base-data.sql	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/base-data.sql	Tue Nov 03 00:48:18 2009 +0000
@@ -651,7 +651,6 @@ REPLACE INTO schemacols (colname, des, t
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('instime', 'The time the action first completed.', 'duplock');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('instime', 'The time the query was buffered. Not used by applications, but useful for debugging/humans.', 'cmdbuffer');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('intcount', 'The number of users listing the interest.', 'interests');
-REPLACE INTO schemacols (colname, des, tablename) VALUES ('interest', 'The text of the interest.', 'interests');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('intid', 'Unique id assigned to the interest.', 'interests');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('is_dirty', 'One of either 1 (indexed), or NULL (not in index) - means we need to update the target\'s etypeid.', 'subs');
 REPLACE INTO schemacols (colname, des, tablename) VALUES ('item', 'base64digest of rss $item', 'synitem');
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/s2layers/tranquilityiii/themes.s2
--- a/bin/upgrading/s2layers/tranquilityiii/themes.s2	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/s2layers/tranquilityiii/themes.s2	Tue Nov 03 00:48:18 2009 +0000
@@ -294,83 +294,4 @@ set color_header_background = "#6ab1b2";
 
 set color_footer_background = "#6ab1b2";
 set color_footer_link = "#effeff";
-set color_footer_link_active = "#effeff";
-set color_footer_link_hover = "#0a566c";
-set color_footer_link_visited = "#effeff";
-
-##===============================
-## Entry Colors
-##===============================
-
-set color_entry_background = "#effeff";
-set color_entry_border = "#effeff";
-set color_entry_text = "#000000";
-set color_entry_link = "#0a566c";
-set color_entry_link_active = "#0a566c";
-set color_entry_link_hover = "#6ab1b2";
-set color_entry_link_visited = "#0a566c";
-set color_entry_interaction_links = "#0a566c";
-
-set color_entry_title_background = "#effeff";
-set color_entry_title = "#000000";
-
-set color_comment_title_background = "#effeff";
-
-##===============================
-## Module Colors
-##===============================
-
-set color_module_background = "#6ab1b2";
-set color_module_border = "#6ab1b2";
-set color_module_text = "#000000";
-set color_module_link = "#effeff";
-set color_module_link_active = "#effeff";
-set color_module_link_hover = "#0a566c";
-set color_module_link_visited = "#effeff";
-
-set color_module_title = "#000000";
-set color_module_title_background = "#6ab1b2";
-
-
-#NEWLAYER: tranquilityiii/wintergreen
-layerinfo type = "theme";
-layerinfo name = "Wintergreen";
-layerinfo redist_uniq = "tranquilityiii/wintergreen";
-layerinfo author_name = "dancing_serpent";
-
-set layout_authors = [ { "name" => "dancing_serpent", "type" => "user" } ];
-
-set color_footer_link_hover = "#6ab23b";
-set color_page_link_active = "#688052";
-set color_page_link = "#688052";
-set color_module_link_hover = "#6ab23b";
-set color_module_background = "#b4cea1";
-set color_entry_border = "#0a440e";
-set color_entry_text = "#0d1402";
-set color_module_title = "#0d1402";
-set color_page_text = "#0d1402";
-set color_footer_background = "#f2fff2";
-set color_header_background = "#b4cea1";
-set color_page_border = "#f2fff2";
-set color_entry_interaction_links = "#688052";
-set color_comment_title_background = "#f2fff2";
-set color_page_background = "#0a440e";
-set color_module_title_background = "#b4cea1";
-set color_entry_link_hover = "#6ab23b";
-set color_page_title = "#0d1402";
-set color_footer_link_active = "#688052";
-set color_footer_link_visited = "#688052";
-set color_module_border = "#b4cea1";
-set color_entry_link = "#688052";
-set color_module_link = "#f2fff2";
-set color_entry_link_visited = "#688052";
-set color_module_link_visited = "#f2fff2";
-set color_entry_title = "#0d1402";
-set color_page_link_visited = "#688052";
-set color_entry_title_background = "#f2fff2";
-set color_page_link_hover = "#6ab23b";
-set color_entry_link_active = "#688052";
-set color_module_link_active = "#f2fff2";
-set color_module_text = "#0d1402";
-set color_entry_background = "#f2fff2";
-set color_footer_link = "#688052";
+set color_footer_link_active = "#effeff"
\ No newline at end of file
diff -r 1047fd6b29d6 -r 0df2b9de2ffc bin/upgrading/update-db-general.pl
--- a/bin/upgrading/update-db-general.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/bin/upgrading/update-db-general.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -152,12 +152,10 @@ EOC
 
 register_tablecreate("interests", <<'EOC');
 CREATE TABLE interests (
-    intid int(10) unsigned NOT NULL auto_increment,
-    interest varchar(255) NOT NULL default '',
+    intid int(10) unsigned NOT NULL,
     intcount mediumint(8) unsigned default NULL,
 
-    PRIMARY KEY  (intid),
-    UNIQUE interest (interest)
+    PRIMARY KEY  (intid)
 )
 EOC
 
@@ -3835,6 +3833,17 @@ EOF
                   q{ALTER TABLE acctcode_promo ADD COLUMN suggest_journalid INT UNSIGNED} );
     }
 
+    # migrate interest names to sitekeywords
+    if ( table_relevant( "sitekeywords" ) && table_relevant( "interests" )
+          && column_type( "interests", "interest" ) ) {
+        do_sql( 'LOCK TABLES sitekeywords WRITE, interests WRITE' );
+        do_sql( "REPLACE INTO sitekeywords (kwid, keyword) ".
+                "SELECT intid, interest FROM interests" );
+        do_alter( "interests",
+                  "ALTER TABLE interests DROP interest" );
+        do_sql( 'UNLOCK TABLES' );
+    }
+
 });
 
 
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/LJ/Directory/Constraint/Interest.pm
--- a/cgi-bin/LJ/Directory/Constraint/Interest.pm	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/LJ/Directory/Constraint/Interest.pm	Tue Nov 03 00:48:18 2009 +0000
@@ -31,16 +31,13 @@ sub load_row {
 sub load_row {
     my $self = shift;
     $self->{_loaded_row} = 1;
-    my $row;
 
-    my $field = $self->{intid} ? "intid" : "interest";
-    return unless $self->{$field};
+    if ( $self->{interest} && ! $self->{intid} ) {
+        $self->{intid} = LJ::get_sitekeyword_id( $self->{interest}, 0 );
+    }
+    return unless $self->{intid};
 
-    my $db = LJ::get_dbh("directory") || LJ::get_db_reader();
-    $row = $db->selectrow_hashref("SELECT intid, interest, intcount FROM interests WHERE $field=?",
-                                  undef, $self->{$field});
-
-    $self->{$_} = $row->{$_} foreach (qw(intid interest intcount));
+    ( $self->{interest}, $self->{intcount} ) = LJ::get_interest( $self->{intid} );
 }
 
 sub matching_uids {
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/LJ/User.pm
--- a/cgi-bin/LJ/User.pm	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/LJ/User.pm	Tue Nov 03 00:48:18 2009 +0000
@@ -4122,14 +4122,17 @@ sub get_interests {
     if ( %need ) {
         my $ids = join( ",", map { $_ + 0 } keys %need );
         my $dbr = LJ::get_db_reader();
-        my $sth = $dbr->prepare( "SELECT intid, interest, intcount FROM interests ".
-                                 "WHERE intid IN ($ids)" );
-        $sth->execute;
+        my $ints = $dbr->selectall_hashref( "SELECT kwid, keyword FROM sitekeywords ".
+                                            "WHERE kwid IN ($ids)", 'kwid' );
+        my $counts = $dbr->selectall_hashref( "SELECT intid, intcount FROM interests ".
+                                              "WHERE intid IN ($ids)", 'intid' );
         my $memc_store = 0;
-        while ( my ($intid, $int, $count) = $sth->fetchrow_array ) {
+        foreach my $intid ( keys %$ints ) {
+            my $int = $ints->{$intid}->{keyword};
+            my $count = $counts->{$intid}->{intcount} + 0;
+            my $aref = [$intid, $int, $count];
             # minimize latency... only store 25 into memcache at a time
             # (too bad we don't have set_multi.... hmmmm)
-            my $aref = [$intid, $int, $count];
             if ( $memc_store++ < 25 ) {
                 # if the count is fairly high, keep item in memcache longer,
                 # since count's not so important.
@@ -4213,7 +4216,7 @@ sub set_interests {
     my %int_new = ();
     my %int_del = %$old;  # assume deleting everything, unless in @$new
 
-    # user interests go in a different table than user interests,
+    # community interests go in a different table than user interests,
     # though the schemas are the same so we can run the same queries on them
     my $uitable = $u->is_community ? 'comminterests' : 'userinterests';
 
@@ -4221,15 +4224,13 @@ sub set_interests {
     my $did_mod = 0;
 
     my @valid_ints = LJ::validate_interest_list(@$new);
-    foreach my $int (@valid_ints)
-    {
+    foreach my $int ( @valid_ints ) {
         $int_new{$int} = 1 unless $old->{$int};
         delete $int_del{$int};
     }
 
     ### were interests removed?
-    if (%int_del)
-    {
+    if ( %int_del ) {
         ## easy, we know their IDs, so delete them en masse
         my $intid_in = join(", ", values %int_del);
         $dbh->do("DELETE FROM $uitable WHERE userid=$userid AND intid IN ($intid_in)");
@@ -4239,17 +4240,14 @@ sub set_interests {
 
     ### do we have new interests to add?
     my @new_intids = ();  ## existing IDs we'll add for this user
-    if (%int_new)
-    {
+    if ( %int_new ) {
         $did_mod = 1;
 
-        ## difficult, have to find intids of interests, and create new ints for interests
-        ## that nobody has ever entered before
         my $int_in = join(", ", map { $dbh->quote($_); } keys %int_new);
-        my %int_exist;
 
         ## find existing IDs
-        my $sth = $dbh->prepare("SELECT interest, intid FROM interests WHERE interest IN ($int_in)");
+        my $sth = $dbh->prepare( "SELECT keyword, kwid FROM sitekeywords " .
+                                 "WHERE keyword IN ($int_in)" );
         $sth->execute;
         while (my ($intr, $intid) = $sth->fetchrow_array) {
             push @new_intids, $intid;       # - we'll add this later.
@@ -4257,14 +4255,11 @@ sub set_interests {
                                             #   this next pass.
         }
 
-        if (@new_intids) {
-            my $sql = "";
-            foreach my $newid (@new_intids) {
-                if ($sql) { $sql .= ", "; }
-                else { $sql = "REPLACE INTO $uitable (userid, intid) VALUES "; }
-                $sql .= "($userid, $newid)";
-            }
-            $dbh->do($sql);
+        ## do updating en masse for interests that already exist
+        if ( @new_intids ) {
+            my $sql = "REPLACE INTO $uitable (userid, intid) VALUES ";
+            $sql .= join( ", ", map { "($userid, $_)" } @new_intids );
+            $dbh->do( $sql );
 
             my $intid_in = join(", ", @new_intids);
             $dbh->do("UPDATE interests SET intcount=intcount+1 WHERE intid IN ($intid_in)");
@@ -4272,29 +4267,23 @@ sub set_interests {
     }
 
     ### do we STILL have interests to add?  (must make new intids)
-    if (%int_new)
-    {
-        foreach my $int (keys %int_new)
-        {
-            my $intid;
-            my $qint = $dbh->quote($int);
-
-            $dbh->do("INSERT INTO interests (intid, intcount, interest) ".
-                     "VALUES (NULL, 1, $qint)");
-            if ($dbh->err) {
-                # somebody beat us to creating it.  find its id.
-                $intid = $dbh->selectrow_array("SELECT intid FROM interests WHERE interest=$qint");
-                $dbh->do("UPDATE interests SET intcount=intcount+1 WHERE intid=$intid");
-            } else {
+    if ( %int_new ) {
+        foreach my $int ( keys %int_new ) {
+            my $intid = LJ::get_sitekeyword_id( $int );
+            next unless $intid;
+
+            my $rows = $dbh->do( "UPDATE interests SET intcount=intcount+1 WHERE intid=?",
+                                 undef, $intid );
+            if ( $rows eq "0E0") {
                 # newly created
-                $intid = $dbh->{'mysql_insertid'};
-            }
-            if ($intid) {
-                ## now we can actually insert it into the userinterests table:
-                $dbh->do("INSERT INTO $uitable (userid, intid) ".
-                         "VALUES ($userid, $intid)");
-                push @new_intids, $intid;
-            }
+                $dbh->do( "INSERT INTO interests (intid, intcount) VALUES (?,?)",
+                          undef, $intid, 1 );
+            }
+            next if $dbh->err;
+            ## now we can actually insert it into the userinterests table:
+            $dbh->do( "INSERT INTO $uitable (userid, intid) VALUES (?,?)",
+                      undef, $userid, $intid );
+            push @new_intids, $intid;
         }
     }
     LJ::run_hooks("set_interests", $u, \%int_del, \@new_intids); # interest => intid
diff -r 1047fd6b29d6 -r 0df2b9de2ffc cgi-bin/ljlib.pl
--- a/cgi-bin/ljlib.pl	Mon Nov 02 07:49:12 2009 -0600
+++ b/cgi-bin/ljlib.pl	Tue Nov 03 00:48:18 2009 +0000
@@ -1522,8 +1522,12 @@ sub get_interest {
         ( $intid, $int, $intcount ) = @$cached;
     } else {
         my $dbr = LJ::get_db_reader();
-        ( $int, $intcount ) =
-            $dbr->selectrow_array( "SELECT interest, intcount FROM interests WHERE intid=?",
+        ( $int ) =
+            $dbr->selectrow_array( "SELECT keyword FROM sitekeywords WHERE kwid=?",
+                                          undef, $intid );
+        die $dbr->errstr if $dbr->err;
+        ( $intcount ) =
+            $dbr->selectrow_array( "SELECT intcount FROM interests WHERE intid=?",
                                    undef, $intid );
         die $dbr->errstr if $dbr->err;
         LJ::MemCache::set( $memkey, [$intid, $int, $intcount], 3600*12 );
diff -r 1047fd6b29d6 -r 0df2b9de2ffc htdocs/interests.bml
--- a/htdocs/interests.bml	Mon Nov 02 07:49:12 2009 -0600
+++ b/htdocs/interests.bml	Tue Nov 03 00:48:18 2009 +0000
@@ -366,18 +366,11 @@ body<=
         return $ret;
     }
 
-    if (!$did_post && ($GET{'intid'} || $GET{'int'})) {
-        my $sth;
-        my $dbr = LJ::get_db_reader();
-        my ($interest, $intid, $intcount);
-        if ($GET{'intid'}) {
-            ($interest, $intid, $intcount) = $dbr->selectrow_array("SELECT interest, intid, intcount
-                FROM interests WHERE intid=?", undef, $GET{'intid'});
-        } else {
-            ($interest, $intid, $intcount) = $dbr->selectrow_array("SELECT interest, intid, intcount
-                FROM interests WHERE interest=?", undef, $GET{'int'});
-        }
-
+    if ( ! $did_post && ( $GET{intid} || $GET{int} ) ) {
+        my $intid = $GET{intid} ? $GET{intid} :
+            LJ::get_sitekeyword_id( $GET{int}, 0 );
+        my ( $interest, $intcount ) = LJ::get_interest( $intid );
+        
         my $check_int = $GET{int} || $interest;
         if (LJ::run_hook("interest_search_ignore", query => $check_int, intid => $intid)) {
             return "<?h1 $ML{'Error'} h1?><?p $ML{'.error.ignored'} p?>";
@@ -421,6 +414,7 @@ body<=
         });
 
         ### communities
+        my $dbr = LJ::get_db_reader();
         my $LIMIT = 500;
 
         my $int_query = sub {
diff -r 1047fd6b29d6 -r 0df2b9de2ffc htdocs/misc/interestdata.bml
--- a/htdocs/misc/interestdata.bml	Mon Nov 02 07:49:12 2009 -0600
+++ b/htdocs/misc/interestdata.bml	Tue Nov 03 00:48:18 2009 +0000
@@ -26,14 +26,11 @@
     }
 
     if ($GET{'int'}) {
-        my $dbr = LJ::get_db_reader();
-        my $sth = $dbr->prepare("SELECT * FROM interests WHERE interest=?");
-        $sth->execute($GET{'int'});
-        my $h = $sth->fetchrow_hashref;
-        return "! invalid interest\n" unless $h;
-        my $ret;
-        $ret = "# Note: Polite data miners cache on their end.  Impolite ones get banned.\n" .
-            "= intcount=$h->{intcount}\n";
+        my $intid = LJ::get_sitekeyword_id( $GET{int}, 0 );
+        return "! invalid interest\n" unless $intid;
+        my ($int, $intcount) = LJ::get_interest( $intid );
+        my $ret = "# Note: Polite data miners cache on their end.  Impolite ones get banned.\n" .
+            "= intcount=$intcount\n";
         return $ret;
     }
     return "! invalid parameters\n";
--------------------------------------------------------------------------------