mark: A photo of Mark kneeling on top of the Taal Volcano in the Philippines. It was a long hike. (Default)
Mark Smith ([staff profile] mark) wrote in [site community profile] changelog2009-04-22 05:00 am

[dw-free] Business statistics

[commit: http://hg.dwscoalition.org/dw-free/rev/6911a9e8a2d2]

http://bugs.dwscoalition.org/show_bug.cgi?id=124

First pass at site statistics.

Patch by [personal profile] pauamma.

Files modified:
  • bin/upgrading/update-db-general.pl
  • bin/worker/stats-collection
  • cgi-bin/DW/StatData.pm
  • cgi-bin/DW/StatData/AccountsByType.pm
  • cgi-bin/DW/StatStore.pm
  • etc/stats-collection.conf
--------------------------------------------------------------------------------
diff -r 3c6a25f07976 -r 6911a9e8a2d2 bin/upgrading/update-db-general.pl
--- a/bin/upgrading/update-db-general.pl	Tue Apr 21 19:10:53 2009 +0000
+++ b/bin/upgrading/update-db-general.pl	Wed Apr 22 05:00:17 2009 +0000
@@ -428,6 +428,30 @@ CREATE TABLE schematables (
     des text,
 
     PRIMARY KEY  (tablename)
+)
+EOC
+
+register_tablecreate("statkeylist", <<'EOC');
+CREATE TABLE statkeylist (
+    statkeyid  int unsigned NOT NULL auto_increment,
+    name       varchar(255) default NULL,
+
+    PRIMARY KEY (statkeyid),
+    UNIQUE KEY (name)
+)
+EOC
+
+register_tablecreate("site_stats", <<'EOC');
+CREATE TABLE site_stats (
+    category_id INT UNSIGNED NOT NULL,
+    key_id INT UNSIGNED NOT NULL,
+    insert_time INT UNSIGNED NOT NULL,
+    value INT UNSIGNED NOT NULL,
+
+    -- FIXME: This is good for retrieving data for a single category+key, but
+    -- maybe not as good if we want all keys for the category, with a limit on
+    -- time (ie, last 5 entries, or last 2 weeks). Do we need an extra index?
+    INDEX (category_id, key_id, insert_time) 
 )
 EOC
 
diff -r 3c6a25f07976 -r 6911a9e8a2d2 bin/worker/stats-collection
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/worker/stats-collection	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+#
+# DW::Worker::StatsCollection - Collect statistics
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+
+package DW::Worker::StatsCollection;
+
+=head1 NAME
+
+DW::Worker::StatsCollection - Collect statistics
+
+=cut
+
+use strict;
+use warnings;
+use lib "$ENV{LJHOME}/cgi-bin";
+use YAML;
+use DW::StatStore;
+
+use base 'LJ::Worker::Manual';
+
+my $conf_file = "$ENV{LJHOME}/etc/stats-collection.conf";
+
+# return 1 if we did work, false if not.
+sub work {
+    my $class = shift;
+
+    my $conf = YAML::LoadFile( $conf_file )
+        or die "Unable to load YAML formatted config: $conf_file\n";
+
+    my %module_categories;
+    foreach my $module ( LJ::ModuleLoader::module_subclasses( 'DW::StatData' ) ) {
+        eval "use $module";
+        $module_categories{$module->category} = [ $module, $module->keylist ];
+    }
+    
+    foreach my $job ( keys %{$conf} ) {
+        my $module = $module_categories{$job}->[0];
+        my $keylist = $module_categories{$job}->[1];
+
+        unless ( $module_categories{$job}->[0] ) {
+            warn "stats-collection: No stat data module matching '$job'\n";
+            next;
+        }
+
+        $keylist = $conf->{$job} eq '*' ? $module->keylist : $conf->{$job};
+        my $data = $module->collect( @$keylist );
+        DW::StatStore->add( $job, %$data )
+            or warn "stats-collection: can't store data collected for $job\n";
+    }
+
+    return 1;
+}
+
+############
+## Run once
+DW::Worker::StatsCollection->work;
+
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatData.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatData.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+#
+# DW::StatData - Abstract superclass for statistics modules
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+package DW::StatData;
+
+=head1 NAME
+
+DW::StatData - Abstract superclass for statistics modules
+
+=head1 SYNOPSIS
+
+  # examples of function usage
+
+=cut
+
+use strict;
+use warnings;
+use Carp qw( confess );
+
+use fields qw( data );
+
+=head1 API
+
+=head2 C<< $self->category >>
+
+Returns the category of statistics handled by this module. Subclasses should override this.
+
+=cut
+
+sub category {
+    confess "'category' should be implemented by subclass";
+}
+
+
+=head2 C<< $self->name >>
+
+Returns the pretty name of this category. Subclasses should override this.
+
+=cut
+
+sub name {
+    confess "'name' should be implemented by subclass";
+}
+
+
+=head2 C<< $self->keylist >>
+
+Returns an array of available keys within this category. Subclasses should override this.
+
+=cut
+
+sub keylist {
+    confess "'keylist' should be implemented by subclass"; 
+}
+
+
+=head2 C<< $self->value( $key ) >>
+
+Given a key, returns a value.
+
+=cut
+
+sub value {
+    my ( $self, $key ) = @_;
+    return $self->data->{$key};
+}
+
+
+=head2 C<< $self->data >>
+
+Returns a hashref of the statistics data under this category.
+
+=cut
+
+sub data {
+    return $_[0]->{data};
+}
+
+=head2 C<< $class->collect( @keys ) >>
+
+Collects data from a specific table or set of tables for statistics under this
+category. @keys is the list of keys to collect statistics for. Returns a
+{ key => value } hashref, like the ->data object method. Subclasses must
+implement this.
+
+=cut
+
+sub collect {
+    confess "'collect' should be implemented by subclass"; 
+}
+
+=head2 C<< $class->new( $category, $name, $key1 => $value, ... ) >>
+
+Initialize
+
+=cut
+
+sub new {
+    return fields::new( $_[0] );
+}
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
+
+1;
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatData/AccountsByType.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatData/AccountsByType.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+#
+# DW::StatData::AccountsByType - Total number of accounts broken down by type
+#
+# Authors:
+#      Afuna <coder.dw@afunamatata.com>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself. For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+package DW::StatData::AccountsByType;
+
+=head1 NAME
+
+DW::StatData::AccountsByType - Total number of accounts broken down by type
+
+=head1 SYNOPSIS
+
+  This module returns values for the following keys:
+    redirect => number of redirected accounts
+    identity => number of identity accounts
+    personal => number of personal accounts
+    syndicated => number of syndicated accounts
+    community => number of community accounts
+    total => total number of accounts
+
+=cut
+
+use strict;
+use warnings;
+
+use base 'DW::StatData';
+
+=head1 API
+
+=head2 C<< $class->collect >>
+ 
+=cut
+
+sub category { "accounts" }
+sub name     { "Accounts by Type" }
+sub keylist  { [ qw( redirect identity personal syndicated community total ) ] }
+
+sub collect {
+    my $class = shift;
+    my %opts = map { $_ => 1 } @_;
+
+    my %data;
+    my $dbslow = LJ::get_dbh( 'slow' ) or die "Can't get slow role";
+    
+    # FIXME: look into using a count(*) ... group by. Efficiency?
+    $data{redirect} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='R'" ) if $opts{redirect};
+    $data{identity} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='I'" ) if $opts{identity};
+    $data{personal} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='P'" ) if $opts{personal};
+    $data{syndicated} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='Y'" ) if $opts{syndicated};
+    $data{community} = $dbslow->selectrow_array( "SELECT COUNT(*) FROM user WHERE journaltype='C'" ) if $opts{community};
+    
+    return \%data;
+}
+
+=head2 C<< $class->data >>
+ 
+=cut
+
+sub data {
+    my $self = shift;
+    my $data = $self->data;
+    $data{total} += $data{$_} foreach keys $data;
+    return $data;
+}
+
+=head1 BUGS
+
+=head1 AUTHORS
+
+Afuna <coder.dw@afunamatata.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=cut
+
+1;
diff -r 3c6a25f07976 -r 6911a9e8a2d2 cgi-bin/DW/StatStore.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cgi-bin/DW/StatStore.pm	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+#
+# DW::StatStore
+#
+# Used for storing, loading, inserting, updating, etc stats.
+#
+# Authors:
+#      Mark Smith <mark@dreamwidth.org>
+#      Pau Amma <pauamma@cpan.org>
+#
+# Copyright (c) 2009 by Dreamwidth Studios, LLC.
+#
+# This program is free software; you may redistribute it and/or modify it under
+# the same terms as Perl itself.  For a copy of the license, please reference
+# 'perldoc perlartistic' or 'perldoc perlgpl'.
+
+=head1 NAME
+
+DW::StatStore -- Statistics store update and retrieval
+
+=head1 SYNOPSIS
+
+  # Add timestamped line to pony stats
+  DW::StatStore->add( 'ponies', total => 34738, sparkly => 45 )
+      or die "Some error happened";
+  # FIXME: define retrieval method(s)
+
+=cut
+
+package DW::StatStore;
+
+use strict;
+use warnings;
+use LJ::Typemap;
+
+=head1 API
+
+=head2 C<< $class->add( $category, $key1 => $value1, ... ) >>
+
+Adds key => value pairs to the statistics for $category, timestamped with the
+current date and time. Category and keys are strings, values are positive
+integers.
+
+=cut
+
+sub add {
+    my ( $class, $catkey, @stats ) = @_;
+    my $catkey_id = $class->to_id( $catkey )
+        or return undef;
+
+    my $dbh = LJ::get_db_writer()
+        or return undef;
+
+    while ( my ( $key, $val ) = splice( @stats, 0, 2 ) ) {
+        my $key_id = $class->to_id( $key )
+            or next;
+
+        # if this insert fails there's not much we can do about it, missing
+        # statistics is not the end of the world
+        $dbh->do(
+            q{INSERT INTO site_stats (category_id, key_id, insert_time, value)
+              VALUES (?, ?, UNIX_TIMESTAMP(), ?)},
+            undef, $catkey_id, $key_id, $val+0
+        );
+    }
+
+    return 1;
+}
+
+=head2 C<< $class->to_id( $key ) >>
+
+Internal: converts key to an id. Key can be either a cat key or a stat key.
+Autocreated on first reference.
+
+=cut
+
+sub to_id {
+    return $_[0]->typemap->class_to_typeid( $_[1] );
+}
+
+=head2 C<< $class->typemap >>
+
+Internal: returns typemap for storing cat keys and stat keys. Autovivified.
+
+=cut
+
+my $tm;
+sub typemap {
+    $tm ||= LJ::Typemap->new( table => 'statkeylist',
+                              classfield => 'name',
+                              idfield => 'statkeyid' );
+    return $tm;
+}
+
+1;
+
+=head1 BUGS
+
+There's no API for retrieving stat data.
+
+=head1 AUTHORS
+
+Mark Smith <mark@dreamwidth.org>
+
+Pau Amma <pauamma@cpan.org>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 2009 by Dreamwidth Studios, LLC.
+
+This program is free software; you may redistribute it and/or modify it under
+the same terms as Perl itself. For a copy of the license, please reference
+'perldoc perlartistic' or 'perldoc perlgpl'.
diff -r 3c6a25f07976 -r 6911a9e8a2d2 etc/stats-collection.conf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/etc/stats-collection.conf	Wed Apr 22 05:00:17 2009 +0000
@@ -0,0 +1,9 @@
+## collect and store all stats under the accounts category
+## hangs if you don't enclose the asterisk in quotes
+# accounts: "*"
+
+## collect and store only these stats under the accounts category
+# accounts: [ identity, personal, redirect ]
+
+accounts: "*"
+
--------------------------------------------------------------------------------

Post a comment in response:

This account has disabled anonymous posting.
If you don't have an account you can create one now.
HTML doesn't work in the subject.
More info about formatting

If you are unable to use this captcha for any reason, please contact us by email at support@dreamwidth.org