[dw-free] move cgi-bin/lj*.pl files into proper modules (in cgi-bin/LJ)
[commit: http://hg.dwscoalition.org/dw-free/rev/07d0d22cb369]
http://bugs.dwscoalition.org/show_bug.cgi?id=1726
Merge statslib.pl with LJ/Stats.pm
Patch by
kareila.
Files modified:
http://bugs.dwscoalition.org/show_bug.cgi?id=1726
Merge statslib.pl with LJ/Stats.pm
Patch by
![[personal profile]](https://www.dreamwidth.org/img/silk/identity/user.png)
Files modified:
- bin/maint/stats.pl
- cgi-bin/LJ/ConfCheck/General.pm
- cgi-bin/LJ/Stats.pm
- cgi-bin/statslib.pl
-------------------------------------------------------------------------------- diff -r b62f457a7a66 -r 07d0d22cb369 bin/maint/stats.pl --- a/bin/maint/stats.pl Mon Sep 12 15:57:03 2011 +0800 +++ b/bin/maint/stats.pl Mon Sep 12 16:01:20 2011 +0800 @@ -17,7 +17,8 @@ use strict; our %maint; -require "$ENV{'LJHOME'}/cgi-bin/statslib.pl"; +use lib "$ENV{LJHOME}/cgi-bin"; +use LJ::Stats; # filled in by ljmaint.pl, 0=quiet, 1=normal, 2=verbose $LJ::Stats::VERBOSE = $LJ::LJMAINT_VERBOSE >= 2 ? 1 : 0; diff -r b62f457a7a66 -r 07d0d22cb369 cgi-bin/LJ/ConfCheck/General.pm --- a/cgi-bin/LJ/ConfCheck/General.pm Mon Sep 12 15:57:03 2011 +0800 +++ b/cgi-bin/LJ/ConfCheck/General.pm Mon Sep 12 16:01:20 2011 +0800 @@ -364,7 +364,7 @@ ); add_conf('$STATS_BLOCK_SIZE', - des => "Used in statslib.pl for scaling stat totals." + des => "Used in LJ/Stats.pm for scaling stat totals." ); add_conf('$SUICIDE_UNDER', diff -r b62f457a7a66 -r 07d0d22cb369 cgi-bin/LJ/Stats.pm --- a/cgi-bin/LJ/Stats.pm Mon Sep 12 15:57:03 2011 +0800 +++ b/cgi-bin/LJ/Stats.pm Mon Sep 12 16:01:20 2011 +0800 @@ -11,13 +11,334 @@ # A copy of that license can be found in the LICENSE file included as # part of this distribution. -# This is a module for returning stats info -# Functions in statslib.pl should get moved here +# This is a module for returning stats info - merged with statslib.pl use strict; package LJ::Stats; +%LJ::Stats::INFO = ( + # jobname => { type => 'global' || 'clustered', + # jobname => jobname + # statname => statname || [statname1, statname2] + # handler => sub {}, + # max_age => age } + ); + +sub LJ::Stats::register_stat { + my $stat = shift; + return undef unless ref $stat eq 'HASH'; + + $stat->{'type'} = $stat->{'type'} eq 'clustered' ? 'clustered' : 'global'; + return undef unless $stat->{'jobname'}; + $stat->{'statname'} ||= $stat->{'jobname'}; + return undef unless ref $stat->{'handler'} eq 'CODE'; + delete $stat->{'max_age'} unless $stat->{'max_age'} > 0; + + # register in master INFO hash + $LJ::Stats::INFO{$stat->{'jobname'}} = $stat; + + return 1; +}; + +sub LJ::Stats::run_stats { + my @stats = @_ ? @_ : sort keys %LJ::Stats::INFO; + + # clear out old partialstatsdata for clusters which are no longer active + # (not in @LJ::CLUSTERS) + LJ::Stats::clear_invalid_cluster_parts(); + + foreach my $jobname (@stats) { + + my $stat = $LJ::Stats::INFO{$jobname}; + + # stats calculated on global db reader + if ($stat->{'type'} eq "global") { + unless (LJ::Stats::need_calc($jobname)) { + print "-I- Up-to-date: $jobname\n"; + next; + } + + # rather than passing an actual db handle to the stat handler, + # just pass a getter subef so it can be revalidated as necessary + my $dbr_getter = sub { + return LJ::Stats::get_db("dbr") + or die "Can't get db reader handle."; + }; + + print "-I- Running: $jobname\n"; + + my $res = $stat->{'handler'}->($dbr_getter); + die "Error running '$jobname' handler on global reader." + unless $res; + + # 2 cases: + # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } ) + # - 'statname' is scalar, %res structure is ( 'arg' => 'val' ) + { + if (ref $stat->{'statname'} eq 'ARRAY') { + foreach my $statname (@{$stat->{'statname'}}) { + LJ::Stats::clear_stat( $statname ); + foreach my $key (keys %{$res->{$statname}}) { + LJ::Stats::save_stat($statname, $key, $res->{$statname}->{$key}); + } + } + } else { + my $statname = $stat->{'statname'}; + LJ::Stats::clear_stat( $statname ); + foreach my $key (keys %$res) { + LJ::Stats::save_stat($statname, $key, $res->{$key}); + } + } + } + + LJ::Stats::save_calc($jobname); + + next; + } + + # stats calculated per-cluster + if ($stat->{'type'} eq "clustered") { + + foreach my $cid (@LJ::CLUSTERS) { + unless (LJ::Stats::need_calc($jobname, $cid)) { + print "-I- Up-to-date: $jobname, cluster $cid\n"; + next; + } + + # pass a dbcr getter subref so the stat handler knows how + # to revalidate its database handles, by invoking this closure + my $dbcr_getter = sub { + return LJ::Stats::get_db("dbcr", $cid) + or die "Can't get cluster $cid db handle."; + }; + + print "-I- Running: $jobname, cluster $cid\n"; + + my $res = $stat->{'handler'}->($dbcr_getter, $cid); + die "Error running '$jobname' handler on cluster $cid." + unless $res; + + # 2 cases: + # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } ) + # - 'statname' is scalar, %res structure is ( 'arg' => 'val' ) + { + if (ref $stat->{'statname'} eq 'ARRAY') { + foreach my $statname (@{$stat->{'statname'}}) { + LJ::Stats::clear_part( $statname, $cid ); + foreach my $key (keys %{$res->{$statname}}) { + LJ::Stats::save_part($statname, $cid, $key, $res->{$statname}->{$key}); + } + } + } else { + my $statname = $stat->{'statname'}; + LJ::Stats::clear_part( $statname, $cid ); + foreach my $key (keys %$res) { + LJ::Stats::save_part($statname, $cid, $key, $res->{$key}); + } + } + } + + LJ::Stats::save_calc($jobname, $cid); + } + + # save the summation(s) of the statname(s) we found above + if (ref $stat->{'statname'} eq 'ARRAY') { + foreach my $statname (@{$stat->{'statname'}}) { + LJ::Stats::save_sum($statname); + } + } else { + LJ::Stats::save_sum($stat->{'statname'}); + } + } + + } + + return 1; +}; + +# get raw dbr/dbh/cluster handle +sub LJ::Stats::get_db { + my $type = shift; + return undef unless $type; + my $cid = shift; + + # tell DBI to revalidate connections before returning them + $LJ::DBIRole->clear_req_cache(); + + my $opts = {raw=>1,nocache=>1}; # get_dbh opts + + # global handles + if ($type eq "dbr") { + my @roles = $LJ::STATS_FORCE_SLOW ? ("slow") : ("slave", "master"); + + my $db = LJ::get_dbh($opts, @roles); + return $db if $db; + + # don't fall back to slave/master if STATS_FORCE_SLOW is on + die "ERROR: Could not get handle for slow database role\n" + if $LJ::STATS_FORCE_SLOW; + + return undef; + } + + return LJ::get_dbh($opts, 'master') + if $type eq "dbh"; + + # cluster handles + return undef unless $cid > 0; + return LJ::get_cluster_def_reader($opts, $cid) + if $type eq "dbcm" || $type eq "dbcr"; + + return undef; +} + +# clear out previous stats from the 'stats' table +sub LJ::Stats::clear_stat { + my ($cat) = @_; + return undef unless $cat; + + my $dbh = LJ::Stats::get_db( "dbh" ); + $dbh->do( "DELETE FROM stats WHERE statcat = ?", undef, $cat ); + die $dbh->errstr if $dbh->err; + + return 1; +} + +# save a given stat to the 'stats' table in the db +sub LJ::Stats::save_stat { + my ($cat, $statkey, $val) = @_; + return undef unless $cat && $statkey && $val; + + # replace/insert stats row + my $dbh = LJ::Stats::get_db("dbh"); + $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES (?, ?, ?)", + undef, $cat, $statkey, $val); + die $dbh->errstr if $dbh->err; + + return 1; +} + +# note the last calctime of a given stat +sub LJ::Stats::save_calc { + my ($jobname, $cid) = @_; + return unless $jobname; + + my $dbh = LJ::Stats::get_db("dbh"); + $dbh->do("REPLACE INTO partialstats (jobname, clusterid, calctime) " . + "VALUES (?,?,UNIX_TIMESTAMP())", undef, $jobname, $cid || 1); + die $dbh->errstr if $dbh->err; + + return 1; +} + +# clear out previous partial stats +sub LJ::Stats::clear_part { + my ($statname, $cid) = @_; + return undef unless $statname && $cid > 0; + + my $dbh = LJ::Stats::get_db( "dbh" ); + $dbh->do( "DELETE FROM partialstatsdata WHERE statname = ? AND clusterid = ?", + undef, $statname, $cid ); + die $dbh->errstr if $dbh->err; + + return 1; +} + +# save partial stats +sub LJ::Stats::save_part { + my ($statname, $cid, $arg, $value) = @_; + return undef unless $statname && $cid > 0; + + # replace/insert partialstats(data) row + my $dbh = LJ::Stats::get_db("dbh"); + $dbh->do("REPLACE INTO partialstatsdata (statname, arg, clusterid, value) " . + "VALUES (?,?,?,?)", undef, $statname, $arg, $cid, $value); + die $dbh->errstr if $dbh->err; + + return 1; +}; + +# see if a given stat is stale +sub LJ::Stats::need_calc { + my ($jobname, $cid) = @_; + return undef unless $jobname; + + my $dbr = LJ::Stats::get_db("dbr"); + my $calctime = $dbr->selectrow_array("SELECT calctime FROM partialstats " . + "WHERE jobname=? AND clusterid=?", + undef, $jobname, $cid || 1); + + my $max = $LJ::Stats::INFO{$jobname}->{'max_age'} || 3600*6; # 6 hours default + return ($calctime < time() - $max); +} + +# clear invalid partialstats data for old clusters +# -- this way if clusters go inactive/dead their partial tallies won't remain +sub LJ::Stats::clear_invalid_cluster_parts { + + # delete partialstats rows for invalid clusters + # -- query not indexed, but data set is small. could add one later + my $dbh = LJ::Stats::get_db("dbh"); + my $bind = join(",", map { "?" } @LJ::CLUSTERS); + $dbh->do("DELETE FROM partialstatsdata WHERE clusterid NOT IN ($bind)", + undef, @LJ::CLUSTERS); + die $dbh->errstr if $dbh->err; + + return 1; +} + +# sum up counts for all clusters +sub LJ::Stats::save_sum { + my $statname = shift; + return undef unless $statname; + + # get sum of this stat for all clusters + my $dbr = LJ::Stats::get_db("dbr"); + my $sth = $dbr->prepare("SELECT arg, SUM(value) FROM partialstatsdata " . + "WHERE statname=? GROUP BY 1"); + $sth->execute($statname); + while (my ($arg, $count) = $sth->fetchrow_array) { + next unless $count; + LJ::Stats::save_stat($statname, $arg, $count); + } + + return 1; +} + +# get number of pages, given a total row count +sub LJ::Stats::num_blocks { + my $row_tot = shift; + return 0 unless $row_tot; + + return int($row_tot / $LJ::STATS_BLOCK_SIZE) + (($row_tot % $LJ::STATS_BLOCK_SIZE) ? 1 : 0); +} + +# get low/high ids for a BETWEEN query based on page number +sub LJ::Stats::get_block_bounds { + my ($block, $offset) = @_; + return ($offset+0, $offset+$LJ::STATS_BLOCK_SIZE) unless $block; + + # calculate min, then add one to not overlap previous max, + # unless there was no previous max so we set to 0 so we don't + # miss rows with id=0 + my $min = ($block-1)*$LJ::STATS_BLOCK_SIZE + 1; + $min = $min == 1 ? 0 : $min; + + return ($offset+$min, $offset+$block*$LJ::STATS_BLOCK_SIZE); +} + +sub LJ::Stats::block_status_line { + my ($block, $total) = @_; + return "" unless $LJ::Stats::VERBOSE; + return "" if $total == 1; # who cares about percentage for one block? + + # status line gets called AFTER work is done, so we show percentage + # for $block+1, that way the final line displays 100% + my $pct = sprintf("%.2f", 100*($block / ($total || 1))); + return " [$pct%] Processing block $block of $total.\n"; +} + sub get_popular_interests { my $memkey = 'pop_interests'; my $ints; diff -r b62f457a7a66 -r 07d0d22cb369 cgi-bin/statslib.pl --- a/cgi-bin/statslib.pl Mon Sep 12 15:57:03 2011 +0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,346 +0,0 @@ -#!/usr/bin/perl -# This code was forked from the LiveJournal project owned and operated -# by Live Journal, Inc. The code has been modified and expanded by -# Dreamwidth Studios, LLC. These files were originally licensed under -# the terms of the license supplied by Live Journal, Inc, which can -# currently be found at: -# -# http://code.livejournal.org/trac/livejournal/browser/trunk/LICENSE-LiveJournal.txt -# -# In accordance with the original license, this code and all its -# modifications are provided under the GNU General Public License. -# A copy of that license can be found in the LICENSE file included as -# part of this distribution. - - -# -# Partial Stats -# - -use strict; - -package LJ::Stats; - -%LJ::Stats::INFO = ( - # jobname => { type => 'global' || 'clustered', - # jobname => jobname - # statname => statname || [statname1, statname2] - # handler => sub {}, - # max_age => age } - ); - -sub LJ::Stats::register_stat { - my $stat = shift; - return undef unless ref $stat eq 'HASH'; - - $stat->{'type'} = $stat->{'type'} eq 'clustered' ? 'clustered' : 'global'; - return undef unless $stat->{'jobname'}; - $stat->{'statname'} ||= $stat->{'jobname'}; - return undef unless ref $stat->{'handler'} eq 'CODE'; - delete $stat->{'max_age'} unless $stat->{'max_age'} > 0; - - # register in master INFO hash - $LJ::Stats::INFO{$stat->{'jobname'}} = $stat; - - return 1; -}; - -sub LJ::Stats::run_stats { - my @stats = @_ ? @_ : sort keys %LJ::Stats::INFO; - - # clear out old partialstatsdata for clusters which are no longer active - # (not in @LJ::CLUSTERS) - LJ::Stats::clear_invalid_cluster_parts(); - - foreach my $jobname (@stats) { - - my $stat = $LJ::Stats::INFO{$jobname}; - - # stats calculated on global db reader - if ($stat->{'type'} eq "global") { - unless (LJ::Stats::need_calc($jobname)) { - print "-I- Up-to-date: $jobname\n"; - next; - } - - # rather than passing an actual db handle to the stat handler, - # just pass a getter subef so it can be revalidated as necessary - my $dbr_getter = sub { - return LJ::Stats::get_db("dbr") - or die "Can't get db reader handle."; - }; - - print "-I- Running: $jobname\n"; - - my $res = $stat->{'handler'}->($dbr_getter); - die "Error running '$jobname' handler on global reader." - unless $res; - - # 2 cases: - # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } ) - # - 'statname' is scalar, %res structure is ( 'arg' => 'val' ) - { - if (ref $stat->{'statname'} eq 'ARRAY') { - foreach my $statname (@{$stat->{'statname'}}) { - LJ::Stats::clear_stat( $statname ); - foreach my $key (keys %{$res->{$statname}}) { - LJ::Stats::save_stat($statname, $key, $res->{$statname}->{$key}); - } - } - } else { - my $statname = $stat->{'statname'}; - LJ::Stats::clear_stat( $statname ); - foreach my $key (keys %$res) { - LJ::Stats::save_stat($statname, $key, $res->{$key}); - } - } - } - - LJ::Stats::save_calc($jobname); - - next; - } - - # stats calculated per-cluster - if ($stat->{'type'} eq "clustered") { - - foreach my $cid (@LJ::CLUSTERS) { - unless (LJ::Stats::need_calc($jobname, $cid)) { - print "-I- Up-to-date: $jobname, cluster $cid\n"; - next; - } - - # pass a dbcr getter subref so the stat handler knows how - # to revalidate its database handles, by invoking this closure - my $dbcr_getter = sub { - return LJ::Stats::get_db("dbcr", $cid) - or die "Can't get cluster $cid db handle."; - }; - - print "-I- Running: $jobname, cluster $cid\n"; - - my $res = $stat->{'handler'}->($dbcr_getter, $cid); - die "Error running '$jobname' handler on cluster $cid." - unless $res; - - # 2 cases: - # - 'statname' is an arrayref, %res structure is ( 'statname' => { 'arg' => 'val' } ) - # - 'statname' is scalar, %res structure is ( 'arg' => 'val' ) - { - if (ref $stat->{'statname'} eq 'ARRAY') { - foreach my $statname (@{$stat->{'statname'}}) { - LJ::Stats::clear_part( $statname, $cid ); - foreach my $key (keys %{$res->{$statname}}) { - LJ::Stats::save_part($statname, $cid, $key, $res->{$statname}->{$key}); - } - } - } else { - my $statname = $stat->{'statname'}; - LJ::Stats::clear_part( $statname, $cid ); - foreach my $key (keys %$res) { - LJ::Stats::save_part($statname, $cid, $key, $res->{$key}); - } - } - } - - LJ::Stats::save_calc($jobname, $cid); - } - - # save the summation(s) of the statname(s) we found above - if (ref $stat->{'statname'} eq 'ARRAY') { - foreach my $statname (@{$stat->{'statname'}}) { - LJ::Stats::save_sum($statname); - } - } else { - LJ::Stats::save_sum($stat->{'statname'}); - } - } - - } - - return 1; -}; - -# get raw dbr/dbh/cluster handle -sub LJ::Stats::get_db { - my $type = shift; - return undef unless $type; - my $cid = shift; - - # tell DBI to revalidate connections before returning them - $LJ::DBIRole->clear_req_cache(); - - my $opts = {raw=>1,nocache=>1}; # get_dbh opts - - # global handles - if ($type eq "dbr") { - my @roles = $LJ::STATS_FORCE_SLOW ? ("slow") : ("slave", "master"); - - my $db = LJ::get_dbh($opts, @roles); - return $db if $db; - - # don't fall back to slave/master if STATS_FORCE_SLOW is on - die "ERROR: Could not get handle for slow database role\n" - if $LJ::STATS_FORCE_SLOW; - - return undef; - } - - return LJ::get_dbh($opts, 'master') - if $type eq "dbh"; - - # cluster handles - return undef unless $cid > 0; - return LJ::get_cluster_def_reader($opts, $cid) - if $type eq "dbcm" || $type eq "dbcr"; - - return undef; -} - -# clear out previous stats from the 'stats' table -sub LJ::Stats::clear_stat { - my ($cat) = @_; - return undef unless $cat; - - my $dbh = LJ::Stats::get_db( "dbh" ); - $dbh->do( "DELETE FROM stats WHERE statcat = ?", undef, $cat ); - die $dbh->errstr if $dbh->err; - - return 1; -} - -# save a given stat to the 'stats' table in the db -sub LJ::Stats::save_stat { - my ($cat, $statkey, $val) = @_; - return undef unless $cat && $statkey && $val; - - # replace/insert stats row - my $dbh = LJ::Stats::get_db("dbh"); - $dbh->do("REPLACE INTO stats (statcat, statkey, statval) VALUES (?, ?, ?)", - undef, $cat, $statkey, $val); - die $dbh->errstr if $dbh->err; - - return 1; -} - -# note the last calctime of a given stat -sub LJ::Stats::save_calc { - my ($jobname, $cid) = @_; - return unless $jobname; - - my $dbh = LJ::Stats::get_db("dbh"); - $dbh->do("REPLACE INTO partialstats (jobname, clusterid, calctime) " . - "VALUES (?,?,UNIX_TIMESTAMP())", undef, $jobname, $cid || 1); - die $dbh->errstr if $dbh->err; - - return 1; -} - -# clear out previous partial stats -sub LJ::Stats::clear_part { - my ($statname, $cid) = @_; - return undef unless $statname && $cid > 0; - - my $dbh = LJ::Stats::get_db( "dbh" ); - $dbh->do( "DELETE FROM partialstatsdata WHERE statname = ? AND clusterid = ?", - undef, $statname, $cid ); - die $dbh->errstr if $dbh->err; - - return 1; -} - -# save partial stats -sub LJ::Stats::save_part { - my ($statname, $cid, $arg, $value) = @_; - return undef unless $statname && $cid > 0; - - # replace/insert partialstats(data) row - my $dbh = LJ::Stats::get_db("dbh"); - $dbh->do("REPLACE INTO partialstatsdata (statname, arg, clusterid, value) " . - "VALUES (?,?,?,?)", undef, $statname, $arg, $cid, $value); - die $dbh->errstr if $dbh->err; - - return 1; -}; - -# see if a given stat is stale -sub LJ::Stats::need_calc { - my ($jobname, $cid) = @_; - return undef unless $jobname; - - my $dbr = LJ::Stats::get_db("dbr"); - my $calctime = $dbr->selectrow_array("SELECT calctime FROM partialstats " . - "WHERE jobname=? AND clusterid=?", - undef, $jobname, $cid || 1); - - my $max = $LJ::Stats::INFO{$jobname}->{'max_age'} || 3600*6; # 6 hours default - return ($calctime < time() - $max); -} - -# clear invalid partialstats data for old clusters -# -- this way if clusters go inactive/dead their partial tallies won't remain -sub LJ::Stats::clear_invalid_cluster_parts { - - # delete partialstats rows for invalid clusters - # -- query not indexed, but data set is small. could add one later - my $dbh = LJ::Stats::get_db("dbh"); - my $bind = join(",", map { "?" } @LJ::CLUSTERS); - $dbh->do("DELETE FROM partialstatsdata WHERE clusterid NOT IN ($bind)", - undef, @LJ::CLUSTERS); - die $dbh->errstr if $dbh->err; - - return 1; -} - -# sum up counts for all clusters -sub LJ::Stats::save_sum { - my $statname = shift; - return undef unless $statname; - - # get sum of this stat for all clusters - my $dbr = LJ::Stats::get_db("dbr"); - my $sth = $dbr->prepare("SELECT arg, SUM(value) FROM partialstatsdata " . - "WHERE statname=? GROUP BY 1"); - $sth->execute($statname); - while (my ($arg, $count) = $sth->fetchrow_array) { - next unless $count; - LJ::Stats::save_stat($statname, $arg, $count); - } - - return 1; -} - -# get number of pages, given a total row count -sub LJ::Stats::num_blocks { - my $row_tot = shift; - return 0 unless $row_tot; - - return int($row_tot / $LJ::STATS_BLOCK_SIZE) + (($row_tot % $LJ::STATS_BLOCK_SIZE) ? 1 : 0); -} - -# get low/high ids for a BETWEEN query based on page number -sub LJ::Stats::get_block_bounds { - my ($block, $offset) = @_; - return ($offset+0, $offset+$LJ::STATS_BLOCK_SIZE) unless $block; - - # calculate min, then add one to not overlap previous max, - # unless there was no previous max so we set to 0 so we don't - # miss rows with id=0 - my $min = ($block-1)*$LJ::STATS_BLOCK_SIZE + 1; - $min = $min == 1 ? 0 : $min; - - return ($offset+$min, $offset+$block*$LJ::STATS_BLOCK_SIZE); -} - -sub LJ::Stats::block_status_line { - my ($block, $total) = @_; - return "" unless $LJ::Stats::VERBOSE; - return "" if $total == 1; # who cares about percentage for one block? - - # status line gets called AFTER work is done, so we show percentage - # for $block+1, that way the final line displays 100% - my $pct = sprintf("%.2f", 100*($block / ($total || 1))); - return " [$pct%] Processing block $block of $total.\n"; -} - -1; --------------------------------------------------------------------------------