← Back to team overview

mvhub-dev team mailing list archive

[Merge] lp:~leegoodrich/mvhub/new_external_reports into lp:mvhub

 

Lee Goodrich has proposed merging lp:~leegoodrich/mvhub/new_external_reports into lp:mvhub.

Requested reviews:
  MVHub devs with commit rights (mvhub-commit)


Refactored run_external_reports to work with new codebase. Added section in reports.html on both sites. Added run_external_reports to mvhub-cron.
-- 
https://code.launchpad.net/~leegoodrich/mvhub/new_external_reports/+merge/28673
Your team MVHub Developers is subscribed to branch lp:mvhub.
=== modified file 'app-mvhub/DocumentRoot/static/mvh/html/reports.shtml'
--- app-mvhub/DocumentRoot/static/mvh/html/reports.shtml	2010-06-22 19:07:08 +0000
+++ app-mvhub/DocumentRoot/static/mvh/html/reports.shtml	2010-06-28 19:40:55 +0000
@@ -16,14 +16,25 @@
     <div id="contentwrapper">
       <div id="contentcolumn">
         <div class="innertube">
-        <h1>PDF Reports</h1>
-	   <p> <a href='/reports/MVHub.com_agencies_and_all_programs.pdf'>
+          <h1>External Reports</h1>
+	   <p> <a href='/reports/mvh/visitors.html'>
+	        Visitors Report
+	        </a> 
+	        This is a report of visitors.
+	   </p>
+	    <p> <a href='/reports/mvh/analog.html'>
+	        Analog Report
+	        </a> 
+	        This is a full web statistics report.
+	   </p>
+       <h1>PDF Reports</h1>
+	   <p> <a href='/reports/mvh/MVHub.com_agencies_and_all_programs.pdf'>
 	        All Agency and Program info
 	        </a> 
 	        formatted for printing (.pdf)
 	   </p>
 	   <p> 
-	       <a href='/reports/MVHub.com_agencies_only.pdf'>
+	       <a href='/reports/mvh/MVHub.com_agencies_only.pdf'>
 	           Agency info only
 	       </a> 
 	       formatted for printing (.pdf)

=== modified file 'app-mvhub/DocumentRoot/static/nsp/html/reports.shtml'
--- app-mvhub/DocumentRoot/static/nsp/html/reports.shtml	2010-06-22 19:00:02 +0000
+++ app-mvhub/DocumentRoot/static/nsp/html/reports.shtml	2010-06-28 19:40:55 +0000
@@ -16,14 +16,25 @@
     <div id="contentwrapper">
       <div id="contentcolumn">
         <div class="innertube">
-        <h1>PDF Reports</h1>
-	   <p> <a href='/reports/NorthShorePort.org_agencies_and_all_programs.pdf'>
-	        All Agency and Program info
-	        </a> 
-	        formatted for printing (.pdf)
+         <h1>External Reports</h1>
+	   <p> <a href='/reports/nsp/visitors.html'>
+	        Visitors Report
+	        </a> 
+	        This is a report of visitors.
+	   </p>
+	    <p> <a href='/reports/nsp/analog.html'>
+	        Analog Report
+	        </a> 
+	        This is a full web statistics report.
+	   </p>
+	  <h1>PDF Reports</h1>
+	   <p> <a href='/reports/nsp/NorthShorePort.org_agencies_and_all_programs.pdf'>
+	      All Agency and Program info
+		</a>
+	       formatted for printing (.pdf)
 	   </p>
 	   <p> 
-	       <a href='/reports/NorthShorePort.org_agencies_only.pdf'>
+	       <a href='/reports/nsp/NorthShorePort.org_agencies_only.pdf'>
 	           Agency info only
 	       </a> 
 	       formatted for printing (.pdf)

=== added file 'app-mvhub/bin/run_external_reports'
--- app-mvhub/bin/run_external_reports	1970-01-01 00:00:00 +0000
+++ app-mvhub/bin/run_external_reports	2010-06-28 19:40:55 +0000
@@ -0,0 +1,312 @@
+#!/usr/bin/perl
+
+# Generate web traffic reports.
+# Optionally run with --verbose
+
+use strict;
+use warnings;
+
+use Carp;
+use File::Basename;
+use File::Copy;
+
+use MVHub::Utils::ConfigSimple;
+
+my $config_file = $ARGV[0] || $ENV{MV_CONFIG_FILE};
+my $CFG = MVHub::Utils::ConfigSimple::create_config_from($config_file);
+
+my $LOG_DIR = $CFG->param('ABSOLUTE_PATH.log_dir');
+my $SITE_URL = $CFG->param('SITE.website_name');
+
+# analog also keeps a log for each site
+# this log is for this script
+my $LOGFILE = "$LOG_DIR/run_external_reports.log";
+
+{    # main
+    my $website_code = $CFG->param('SITE.website_code');
+    my $log_to_process;
+    my $processed_log;
+    my $output_file;
+
+    logentry("started run");
+
+    $log_to_process = "$LOG_DIR/combined.log";
+    $processed_log  = "$LOG_DIR/combined.log.resolveme";
+
+    move_logfile( $log_to_process, $processed_log );
+    $log_to_process = $processed_log;
+
+    strip_local_ip_numbers( $log_to_process, '^10\.0\.0\.' );
+
+    $processed_log = "$LOG_DIR/combined.log.resolved";
+    my $tmp_dir = $CFG->param('ABSOLUTE_PATH.tmp_dir');
+    replace_ip_with_hostnames( $log_to_process, $processed_log, $tmp_dir );
+
+    $output_file = $CFG->param('ABSOLUTE_PATH.reports_dir')
+        . "/visitors.html";
+    run_visitors_report( $processed_log, $output_file );
+
+    $output_file = $CFG->param('ABSOLUTE_PATH.reports_dir')
+        . "/analog.html";
+    run_analog_report( $processed_log, $output_file );
+
+    logentry("finished run");
+}
+
+# copy combined logs to other file for
+# later processing  can't have apache
+# & jdresolve both writing @ same time
+sub move_logfile {
+    my $source      = shift;
+    my $destination = shift;
+
+    logentry("moving $source to $destination");
+
+    # we don't want clobber
+    # leftovers from past failure
+    if ( -e $destination ) {
+        logentry("$destination exists... bad..not clobbering");
+        return;
+    }
+
+    # if file doesn't exist, create it
+    ( system("/usr/bin/touch $source") == 0 )
+        or logentry("couldn't touch $source");
+
+    # apache will keep writing to renamed file
+    File::Copy::move( $source, $destination )
+        or fatal_logentry("couldn't move $source: $!\n");
+    ( system("/bin/touch $source") == 0 )
+        or fatal_logentry("bad touch: $source");
+
+    ( system("/bin/chmod a+r $source") == 0 )
+        or fatal_logentry("bad chmod: $source");
+}
+
+sub strip_local_ip_numbers {
+    my $log_to_strip = shift;
+    my $strip_regex  = shift;
+
+    my $cmd = "/bin/grep -v $strip_regex $log_to_strip > 
+    $log_to_strip.striped";
+
+    logentry("stripping local IPs from $log_to_strip");
+
+    # skip files that have no local ips
+    $cmd = "/bin/grep -l $strip_regex $log_to_strip > /dev/null";
+    system($cmd);
+    return 1 if ( ( $? >> 8 ) == 1 );
+
+    $cmd = "/bin/grep -v $strip_regex $log_to_strip > $log_to_strip.striped";
+    system($cmd);
+
+    if ( ( $? >> 8 ) == 2 ) {
+        fatal_logentry("grep error in $log_to_strip");
+    }
+
+    move( "$log_to_strip.striped", $log_to_strip )
+        or fatal_logentry("couldn't rename in strip_local_ip_number");
+}
+
+sub replace_ip_with_hostnames {
+    my $log_to_resolve = shift or croak 'missing param: $log_to_resolve';
+    my $resolved_log   = shift or croak 'missing param: $resolved_log';
+    my $tmp_dir        = shift or croak 'missing param $tmp_dir';
+
+    my $cmd .= '/usr/bin/jdresolve --dbfirst -r -n ';
+    $cmd    .= " --database=$tmp_dir/jdresolve_dns_cache.db";
+    $cmd    .= " --dbfirst $log_to_resolve >> $resolved_log";
+
+    logentry("resolving $log_to_resolve");
+
+    ( system($cmd) == 0 )  or fatal_logentry("command failed: $cmd");
+    unlink $log_to_resolve or fatal_logentry(
+        "failed to remove 
+        $log_to_resolve"
+    );
+}
+
+sub run_analog_report {
+    my $in  = shift or croak 'missing param: input_file';
+    my $out = shift or croak 'missing param: output_file';
+
+    logentry("start analog report for $in");
+
+    _generate_analog_conf_file();
+   
+    my $cmd = "/usr/bin/analog -G +g$LOG_DIR/analog.conf +O$out $in 2>/dev/null";
+
+    if ( -e '/var/cache/analog/dnsfile.txt.Lock' ) {
+        my $error_msg =
+"leftover analog lock file present: /var/cache/analog/dnsfile.txt.Lock";
+        fatal_logentry($error_msg);
+    }
+
+    if ( system($cmd) != 0 ) {
+        logentry("Warning - couldn't run $cmd $!");
+    }
+    logentry("done analog report for $out");
+}
+
+sub _generate_analog_conf_file {
+    open my $outfile, ">", "$LOG_DIR/analog.conf" or croak "Cannot open $LOG_DIR/analog.conf\n";
+    my $analog_conf = <<"END";
+# Configuration file for analog 4.01
+HOSTNAME "Debian Linux System"
+OUTFILE index.html
+ERRFILE $LOG_DIR/analog_errors.txt
+
+PAGEINCLUDE *.htm 
+PAGEINCLUDE *.shtml 
+PAGEINCLUDE *.html 
+PAGEINCLUDE */
+
+# cache DNS lookups
+DNS write
+
+# to this file
+DNSFILE /var/cache/analog/dnsfile.txt
+DNSLOCKFILE /var/cache/analog/dnsfile.txt.Lock
+
+# keep DNS lookup results for this many hours
+DNSGOODHOURS 472
+
+
+LOGFILE $LOG_DIR/referer* 
+LOGFORMAT (COMBINED)
+LOGFILE $LOG_DIR/combined*
+
+
+
+#reports we want
+ALL ON
+VHOST OFF
+PROCTIME OFF # don't log data for this
+
+LINKINCLUDE pages
+REFLINKINCLUDE pages
+
+# people following links to from outside
+# sites to us are interesting 
+# our internal links are not interesting
+# exclude them from the report
+# 
+# replace the lctc.org entries 
+# with ones that are relevant to your site 
+REFREPEXCLUDE http://10.0.0.5/*
+REFREPEXCLUDE http://$SITE_URL/*
+REFREPEXCLUDE http://www.$SITE_URL/*
+REFREPEXCLUDE http://*.$SITE_URL/*
+
+UNCOMPRESS *.gz,*.Z "zcat"
+BROWOUTPUTALIAS Mozilla Netscape
+BROWOUTPUTALIAS "Mozilla (compatible)" "Netscape (compatible)"
+BROWOUTPUTALIAS IWENG AOL
+SEARCHENGINE http://*altavista.*/* q
+SEARCHENGINE http://*yahoo.*/* p
+SEARCHENGINE http://*google.*/* q
+SEARCHENGINE http://*lycos.*/* query
+SEARCHENGINE http://*aol.*/* query
+SEARCHENGINE http://*excite.*/* search
+SEARCHENGINE http://*go2net.*/* general
+SEARCHENGINE http://*metacrawler.*/* general
+SEARCHENGINE http://*msn.*/* MT
+SEARCHENGINE http://*hotbot.com/* MT
+SEARCHENGINE http://*netscape.*/* search
+SEARCHENGINE http://*looksmart.*/* key
+SEARCHENGINE http://*infoseek.*/* qt
+SEARCHENGINE http://*webcrawler.*/* search,searchText
+SEARCHENGINE http://*goto.*/* Keywords
+SEARCHENGINE http://*snap.*/* keyword
+SEARCHENGINE http://*dogpile.*/* q
+SEARCHENGINE http://*askjeeves.*/* ask
+SEARCHENGINE http://*ask.*/* ask
+SEARCHENGINE http://*aj.*/* ask
+SEARCHENGINE http://*directhit.*/* qry
+SEARCHENGINE http://*alltheweb.*/* query
+SEARCHENGINE http://*northernlight.*/* qr
+SEARCHENGINE http://*nlsearch.*/* qr
+SEARCHENGINE http://*dmoz.*/* search
+SEARCHENGINE http://*newhoo.*/* search
+SEARCHENGINE http://*netfind.*/* query,search,s
+SEARCHENGINE http://*/netfind* query
+SEARCHENGINE http://*/pursuit query
+SUBTYPE *.gz,*.Z
+TYPEOUTPUTALIAS .html    ".html [Hypertext Markup Language]"
+TYPEOUTPUTALIAS .htm     ".htm  [Hypertext Markup Language]"
+TYPEOUTPUTALIAS .ps      ".ps   [PostScript]"
+TYPEOUTPUTALIAS .gz      ".gz   [Gzip compressed files]"
+TYPEOUTPUTALIAS .html.gz ".html.gz [Gzipped HTML]"
+TYPEOUTPUTALIAS .ps.gz   ".ps.gz [Gzipped PostScript]"
+TYPEOUTPUTALIAS .xbm     ".xbm  [X11 bitmaps]"
+TYPEOUTPUTALIAS .tar.gz  ".tar.gz [Compressed archives]"
+TYPEOUTPUTALIAS .jpg     ".jpg  [JPEG graphics]"
+TYPEOUTPUTALIAS .jpeg    ".jpeg  [JPEG graphics]"
+TYPEOUTPUTALIAS .gif     ".gif  [GIF graphics]"
+TYPEOUTPUTALIAS .xbm     ".xbm  [X bitmap]"
+TYPEOUTPUTALIAS .txt     ".txt  [Plain text]"
+TYPEOUTPUTALIAS .class   ".class [Java class files]"
+TYPEOUTPUTALIAS .pdf     ".pdf  [Adobe Portable Document Format]"
+TYPEOUTPUTALIAS .zip     ".zip  [Zip archives]"
+TYPEOUTPUTALIAS .hqx     ".hqx  [Macintosh archives]"
+TYPEOUTPUTALIAS .exe     ".exe  [Executables]"
+TYPEOUTPUTALIAS .wav     ".wav  [WAV sound files]"
+TYPEOUTPUTALIAS .png     ".png  [PNG graphics]"
+TYPEOUTPUTALIAS .avi     ".avi  [AVI movies]"
+TYPEOUTPUTALIAS .arc     ".arc  [Compressed archives]"
+TYPEOUTPUTALIAS .mid     ".mid  [MIDI sound files]"
+TYPEOUTPUTALIAS .doc     ".doc  [Microsoft Word document]"
+TYPEOUTPUTALIAS .rtf     ".rtf  [Rich Text Format]"
+TYPEOUTPUTALIAS .mov     ".mov  [Quick Time movie]"
+TYPEOUTPUTALIAS .mpg     ".mpg  [MPEG movie]"
+TYPEOUTPUTALIAS .mpeg    ".mpeg  [MPEG movie]"
+END
+    print $outfile $analog_conf;
+    close $outfile;
+}
+
+sub run_visitors_report {
+
+    my $in  = shift or croak 'missing param: input_file';
+    my $out = shift or croak 'missing param: output_file';
+
+    logentry("start visitor report for $in");
+
+    my $cmd = "/usr/bin/visitors -A $in -o html --output-file $out";
+
+    # presumably from David Siegal / Eric Adum circa 2004
+    # In version .3a of visitors, both status and error messages are printed
+    # to STDERR. So we'll capture standard error, but only print it if the
+    # command fails (i.e. exits with non-zero code). Note however that
+    # visitors does not fail for some errors, e.g. inability to read a
+    # log file. Sigh. I've submitted a patch request for the next version.
+
+    # danm 2010-06-22 current visitors version is 0.7-4
+
+    my $error_msg = `$cmd 2>&1`;
+    if ($?) {
+        warn "$cmd failed. $error_msg.\n";
+        logentry("$cmd failed. $error_msg.\n");
+    }
+    logentry("done visitor report for $out");
+}
+
+# kludge no locking for example
+sub logentry {
+    my $msg = shift;
+    open( my $LOG, '>>', $LOGFILE ) or warn "bad open for append: $! : 
+    $LOGFILE\n";
+    my $timestamp = `date`;
+    chomp $timestamp;
+    print $LOG "$timestamp | $0 | $$ | $msg\n";
+    close($LOG) or warn "bad close:  $! : $LOGFILE\n";
+
+}
+
+sub fatal_logentry {
+    my $msg = shift;
+    $msg = "FATAL ERROR SCRIPT ENDED SUDDENLY: $msg";
+    logentry($msg);
+    die "$msg\n";
+}
+

=== modified file 'app-mvhub/project-tools/templates/template.conf'
--- app-mvhub/project-tools/templates/template.conf	2010-06-13 22:41:22 +0000
+++ app-mvhub/project-tools/templates/template.conf	2010-06-28 19:40:55 +0000
@@ -114,6 +114,7 @@
 setup_db_dir=link-to-live-code/app-mvhub/setup/database/sql/
 setup_etc_dir=link-to-live-code/app-mvhub/setup/etc/
 user_conf_dir=conf/
+log_dir=BAD_FIX_IN_CONF_FILE
 
 [COOKIES]
 # name of cookie used to store 

=== modified file 'app-mvhub/setup/etc/cron.d/mvhub-cron'
--- app-mvhub/setup/etc/cron.d/mvhub-cron	2010-05-21 17:31:47 +0000
+++ app-mvhub/setup/etc/cron.d/mvhub-cron	2010-06-28 19:40:55 +0000
@@ -20,5 +20,9 @@
     10    0    *             *     *           www-data   $BIN_DIR//generate_agency_program_pdf.pl $CONF_DIR/$MVH_CONF_FILE
     15    0    *             *     *           www-data   $BIN_DIR//generate_agency_program_pdf.pl $CONF_DIR/$NSP_CONF_FILE
 
+    20	  0    *	     *	   *	       www-data   $BIN_DIR/run_external_reports.pl $CONF_DIR/$MVH_CONF_FILE
+    25	  0    *	     *	   *	       www-data   $BIN_DIR/run_external_reports.pl $CONF_DIR/$NSP_CONF_FILE
+    30    0    *             *     *           root	  /usr/sbin/apache2ctl graceful
+
     51    8    1,15          *     *           www-data   $BIN_DIR/notification_email.pl --config=$CONF_DIR/$MVH_CONF_FILE --execute
     59    8    1,15          *     *           www-data   $BIN_DIR/notification_email.pl --config=$CONF_DIR/$NSP_CONF_FILE --execute

=== modified file 'app-mvhub/t/debian_packages_installed.t'
--- app-mvhub/t/debian_packages_installed.t	2010-06-23 19:16:22 +0000
+++ app-mvhub/t/debian_packages_installed.t	2010-06-28 19:40:55 +0000
@@ -7,7 +7,10 @@
 
 use Test::More;
 my %required_packages = (
+    'analog'                          => [],
     'apache2-mpm-itk'                 => ['apache2-mpm-prefork'],
+    'graphviz'                        => [],
+    'jdresolve'                       => [],
     'libapache2-mod-macro'            => [],
     'libcgi-application-perl'         => [],
     'libcgi-application-plugins-perl' => [],
@@ -51,6 +54,7 @@
     'tetex-extra' => ['texlive-extra-utils'],
     'tidy'        => [],
     'wamerican'   => [],
+    'visitors'    => [],
 );
 
 {    # main

=== modified file 'lib-mvhub/lib/MVHub/Utils/Setup.pm'
--- lib-mvhub/lib/MVHub/Utils/Setup.pm	2010-06-25 14:51:40 +0000
+++ lib-mvhub/lib/MVHub/Utils/Setup.pm	2010-06-28 19:40:55 +0000
@@ -78,7 +78,10 @@
     $template_cfg->param( 'NOTIFICATION.dev_email', "$username\@thecsl.org" );
     $template_cfg->param( 'SITE.website_name',
         "$site_code.$username.testing123.net" );
-    $template_cfg->param( 'SITE.website_code', "$site_code" );
+    $template_cfg->param( 'SITE.website_code',     "$site_code" );
+    $template_cfg->param( 'RELATIVE_PATH.log_dir', "log/$site_code/" );
+    $template_cfg->param( 'RELATIVE_PATH.reports_dir',
+        "reports/$site_code/" );
 
     return $template_cfg;
 }

=== modified file 'lib-mvhub/t/conf/all.conf'
--- lib-mvhub/t/conf/all.conf	2010-06-16 17:56:56 +0000
+++ lib-mvhub/t/conf/all.conf	2010-06-28 19:40:55 +0000
@@ -96,6 +96,7 @@
 setup_db_dir=link-to-live-code/app-mvhub/setup/database/sql/
 setup_etc_dir=link-to-live-code/app-mvhub/setup/etc/
 user_conf_dir=conf/
+log_dir=log/nsp/
 
 [COOKIES]
 # name of cookie used to store 


Follow ups