mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-04-22 05:00 am

[dw-free] Business statistics

[commit: http://hg.dwscoalition.org/dw-free/rev/6911a9e8a2d2]

http://bugs.dwscoalition.org/show_bug.cgi?id=124

First pass at site statistics.

Patch by [personal profile] pauamma.

Files modified:
  • bin/upgrading/update-db-general.pl
  • bin/worker/stats-collection
  • cgi-bin/DW/StatData.pm
  • cgi-bin/DW/StatData/AccountsByType.pm
  • cgi-bin/DW/StatStore.pm
  • etc/stats-collection.conf
--------------------------------------------------------------------------------
diff -r 3c6a25f07976 -r 6911a9e8a2d2 bin/upgrading/update-db-general.pl
--- a/bin/upgrading/update-db-general.pl	Tue Apr 21 19:10:53 2009 +0000
+++ b/bin/upgrading/update-db-general.pl	Wed Apr 22 05:00:17 2009 +0000
@@ -428,6 +428,30 @@ CREATE TABLE schematables (
     des text,
 
     PRIMARY KEY  (tablename)
+)
+EOC
+
+register_tablecreate("statkeylist", <<'EOC');
+CREATE TABLE statkeylist (
+    statkeyid  int unsigned NOT NULL auto_increment,
+    name       varchar(255) default NULL,
+
+    PRIMARY KEY (statkeyid),
+    UNIQUE KEY (name)
+)
+EOC
+
+register_tablecreate("site_stats", <<'EOC');
+CREATE TABLE site_stats (
+    category_id INT UNSIGNED NOT NULL,
+    key_id INT UNSIGNED NOT NULL,
+    insert_time INT UNSIGNED NOT NULL,
+    value INT UNSIGNED NOT NULL,
+
+    -- FIXME: This is good for retrieving data for a single category+key, but
+    -- maybe not as good if we want all keys for the category, with a limit on
+    -- time (ie, last 5 entries, or last 2 weeks). Do we need an extra index?
+    INDEX (category_id, key_id, insert_time) 
 )
 EOC
 
diff -r 3c6a25f07976 -r 6911a9e8a2d2 bin/worker/stats-collection
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/worker/stats-collection	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+#
+# DW::Worker::StatsCollection - Collect statistics
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+
+package DW::Worker::StatsCollection;
+
+=head1 NAME
+
+DW::Worker::StatsCollection - Collect statistics
+
+=cut
+
+use strict;
+use warnings;
+use lib "$ENV{LJHOME}/cgi-bin";
+use YAML;
+use DW::StatStore;
+
+use base 'LJ::Worker::Manual';
+
+my $conf_file = "$ENV{LJHOME}/etc/stats-collection.conf";
+
+# return 1 if we did work, false if not.
+sub work {
+    my $class = shift;
+
+    my $conf = YAML::LoadFile( $conf_file )
+        or die "Unable to load YAML formatted config: $conf_file\n";
+
+    my %module_categories;
+    foreach my $module ( LJ::ModuleLoader::module_subclasses( 'DW::StatData' ) ) {
+        eval "use $module";
+        $module_categories{$module->category} = [ $module, $module->keylist ];
+    }
+    
+    foreach my $job ( keys %{$conf} ) {
+        my $module = $module_categories{$job}->[0];
+        my $keylist = $module_categories{$job}->[1];
+
+        unless ( $module_categories{$job}->[0] ) {
+            warn "stats-collection: No stat data module matching '$job'\n";
+            next;
+        }
+
+        $keylist = $conf->{$job} eq '*' ? $module->keylist : $conf->{$job};
+        my $data = $module->collect( @$keylist );
+        DW::StatStore->add( $job, %$data )
+            or warn "stats-collection: can't store data collected for $job\n";
+    }
+
+    return 1;
+}
+
+############
+## Run once
+DW::Worker::StatsCollection->work;
+
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatData.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatData.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+#
+# DW::StatData - Abstract superclass for statistics modules
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+package DW::StatData;
+
+=head1 NAME
+
+DW::StatData - Abstract superclass for statistics modules
+
+=head1 SYNOPSIS
+
+  # examples of function usage
+
+=cut
+
+use strict;
+use warnings;
+use Carp qw( confess );
+
+use fields qw( data );
+
+=head1 API
+
+=head2 C<< $self->category >>
+
+Returns the category of statistics handled by this module. Subclasses should override this.
+
+=cut
+
+sub category {
+    confess "'category' should be implemented by subclass";
+}
+
+
+=head2 C<< $self->name >>
+
+Returns the pretty name of this category. Subclasses should override this.
+
+=cut
+
+sub name {
+    confess "'name' should be implemented by subclass";
+}
+
+
+=head2 C<< $self->keylist >>
+
+Returns an array of available keys within this category. Subclasses should override this.
+
+=cut
+
+sub keylist {
+    confess "'keylist' should be implemented by subclass"; 
+}
+
+
+=head2 C<< $self->value( $key ) >>
+
+Given a key, returns a value.
+
+=cut
+
+sub value {
+    my ( $self, $key ) = @_;
+    return $self->data->{$key};
+}
+
+
+=head2 C<< $self->data >>
+
+Returns a hashref of the statistics data under this category.
+
+=cut
+
+sub data {
+    return $_[0]->{data};
+}
+
+=head2 C<< $class->collect( @keys ) >>
+
+Collects data from a specific table or set of tables for statistics under this
+category. @keys is the list of keys to collect statistics for. Returns a
+{ key => value } hashref, like the ->data object method. Subclasses must
+implement this.
+
+=cut
+
+sub collect {
+    confess "'collect' should be implemented by subclass"; 
+}
+
+=head2 C<< $class->new( $category, $name, $key1 => $value, ... ) >>
+
+Initialize
+
+=cut
+
+sub new {
+    return fields::new( $_[0] );
+}
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
+
+1;
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatData/AccountsByType.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatData/AccountsByType.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+#
+# DW::StatData::AccountsByType - Total number of accounts broken down by type
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+package DW::StatData::AccountsByType;
+
+=head1 NAME
+
+DW::StatData::AccountsByType - Total number of accounts broken down by type
+
+=head1 SYNOPSIS
+
+  This module returns values for the following keys:
+    redirect => number of redirected accounts
+    identity => number of identity accounts
+    personal => number of personal accounts
+    syndicated => number of syndicated accounts
+    community => number of community accounts
+    total => total number of accounts
+
+=cut
+
+use strict;
+use warnings;
+
+use base 'DW::StatData';
+
+=head1 API
+
+=head2 C<< $class->collect >>
+ 
+=cut
+
+sub category { "accounts" }
+sub name     { "Accounts by Type" }
+sub keylist  { [ qw( redirect identity personal syndicated community total ) ] }
+
+sub collect {
+    my $class = shift;
+    my %opts = map { $_ => 1 } @_;
+
+    my %data;
+    my $dbslow = LJ::get_dbh( 'slow' ) or die "Can't get slow role";
+    
+    # FIXME: look into using a count(*) ... group by. Efficiency?
+    $data{redirect} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='R'" ) if $opts{redirect};
+    $data{identity} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='I'" ) if $opts{identity};
+    $data{personal} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='P'" ) if $opts{personal};
+    $data{syndicated} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='Y'" ) if $opts{syndicated};
+    $data{community} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='C'" ) if $opts{community};
+    
+    return \%data;
+}
+
+=head2 C<< $class->data >>
+ 
+=cut
+
+sub data {
+    my $self = shift;
+    my $data = $self->data;
+    $data{total} += $data{$_} foreach keys $data;
+    return $data;
+}
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
+
+1;
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatStore.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatStore.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+#
+# DW::StatStore
+#
+# Used for storing, loading, inserting, updating, etc stats.
+#
+# Authors:
+#      Mark Smith <mark@dreamwidth.org>
+#      Pau Amma <pauamma@cpan.org>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself.  For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=head1 NAME
+
+DW::StatStore -- Statistics store update and retrieval
+
+=head1 SYNOPSIS
+
+  # Add timestamped line to pony stats
+  DW::StatStore->add( 'ponies', total => 34738, sparkly => 45 )
+      or die "Some error happened";
+  # FIXME: define retrieval method(s)
+
+=cut
+
+package DW::StatStore;
+
+use strict;
+use warnings;
+use LJ::Typemap;
+
+=head1 API
+
+=head2 C<< $class->add( $category, $key1 => $value1, ... ) >>
+
+Adds key => value pairs to the statistics for $category, timestamped with the
+current date and time. Category and keys are strings, values are positive
+integers.
+
+=cut
+
+sub add {
+    my ( $class, $catkey, @stats ) = @_;
+    my $catkey_id = $class->to_id( $catkey )
+        or return undef;
+
+    my $dbh = LJ::get_db_writer()
+        or return undef;
+
+    while ( my ( $key, $val ) = splice( @stats, 0, 2 ) ) {
+        my $key_id = $class->to_id( $key )
+            or next;
+
+        # if this insert fails there's not much we can do about it, missing
+        # statistics is not the end of the world
+        $dbh->do(
+            q{INSERT INTO site_stats (category_id, key_id, insert_time, value)
+              VALUES (?, ?, UNIX_TIMESTAMP(), ?)},
+            undef, $catkey_id, $key_id, $val+0
+        );
+    }
+
+    return 1;
+}
+
+=head2 C<< $class->to_id( $key ) >>
+
+Internal: converts key to an id. Key can be either a cat key or a stat key.
+Autocreated on first reference.
+
+=cut
+
+sub to_id {
+    return $_[0]->typemap->class_to_typeid( $_[1] );
+}
+
+=head2 C<< $class->typemap >>
+
+Internal: returns typemap for storing cat keys and stat keys. Autovivified.
+
+=cut
+
+my $tm;
+sub typemap {
+    $tm ||= LJ::Typemap->new( table => 'statkeylist',
+                              classfield => 'name',
+                              idfield => 'statkeyid' );
+    return $tm;
+}
+
+1;
+
+=head1 BUGS
+
+There's no API for retrieving stat data.
+
+=head1 AUTHORS
+
+Mark Smith <mark@dreamwidth.org>
+
+Pau Amma <pauamma@cpan.org>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
diff -r 3c6a25f07976 -r 6911a9e8a2d2 etc/stats-collection.conf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/etc/stats-collection.conf	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,9 @@
+## collect and store all stats under the accounts category
+## hangs if you don't enclose the asterisk in quotes
+# accounts: "*"
+
+## collect and store only these stats under the accounts category
+# accounts: [ identity, personal, redirect ]
+
+accounts: "*"
+
--------------------------------------------------------------------------------