Cookie Notice

As far as I know, and as far as I remember, nothing in this page does anything with Cookies.

2011/03/24

Now There IS An App For That

I had to do something. This was something. So I did this.

This program collects the functions by module in a directory full of modules and checks a code base against it. This tells you which functions you are actually using. Which isn't quite what I wanted, but close.

I can see some additions I could want. Setting the library directory and code directories via Getopt::Long would be the first one. And it doesn't quite tell me what I want to know in all cases. If I use a function in a program that never gets called, it still gives me a result. But this is a place to start.

And because of this, I now know that, within the stack of previously-invented wheels called CPAN, there's a module called Regexp::Common that holds a stack of established regular expressions. I wanted to pull out all comments out of my programs for testing, so that a commented-out function doesn't count.

#!/usr/bin/perl

use 5.010 ;
use strict ;
use warnings ;

use Cwd 'abs_path' ;
use Regexp::Common qw /comment/ ;

use subs qw{
    check_programs
    module_list

    decomment

    drop_pm
    get_module_subs
    pull_package_name
    pull_module_name
    pull_sub_name
    } ;

my $modules     = module_list '/path/to/my/lib' ;
my $directories =  [
    '/code/directory/one',
    '/code/directory/two',
    '/code/directory/three', ] ;

my $data = check_programs( $directories, $modules ) ;

for my $mod ( sort keys %$data ) {
    my $module = $data->{ $mod } ;
    say $mod ;
    for my $sub ( sort keys %$module ) {
        my $subroutine = $module->{ $sub } ;
        say join "\t", '',
            ( $subroutine->{ count } ? $subroutine->{ count } : 0 ) ,
            $sub,
            ;
        }
    }

exit ;

########## ######### ######### ######### ######### #########
########## #########     Subroutines     ######### #########
########## ######### ######### ######### ######### #########

#--------- --------- --------- --------- --------- --------- ---------
# The core of the program
sub check_programs {
        my ( $directories, $modules ) = @_ ;
        my $data ;
        for my $program_dir ( @$directories ) {
            my $program_directory = abs_path $program_dir ;
            chdir $program_directory ;

            #say $program_directory ;
            my @directory = glob '*.cgi *.pl *.pm' ;

            my $programs ;
            @$programs = map {
                { $_ => get_program( $_ ) }
                } @directory ;
            for my $program ( @$programs ) {
                my $k ;
                ( $k ) = keys %$program ;
                my $v = $program->{ $k } ;

                #say join "\t", '', $k ;
                for my $module ( @$modules ) {
                    my $mk ;
                    ( $mk ) = keys %$module ;
                    my $mv = $module->{ $mk } ;

                    #say join "\t", '', '', $mk ;
                    for my $sub ( @$mv ) {
                        my $result = $v =~ /$sub/ ? 1 : 0 ;

                        #$result
                        #    and say join "\t", '', '', '', $result, $sub ;
                        $data->{ $mk }->{ $sub }->{ exists } = 1 ;
                        if ( $result ) {
                            $data->{ $mk }->{ $sub }->{ count }++ ;
                            push @{ $data->{ $mk }->{ $sub }->{ used } },
                                abs_path $k ;
                                }
                        }
                    }
                }
            }
        return $data ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# returns the contents of a filename, if it contains 'perl' in the top
sub get_program {
        my ( $filename ) = @_ ;
        if ( -f $filename ) {
            if ( open my $fh, '<', $filename ) {
                my @lines =
                    map { decomment $_ } <$fh> ;

                #return 0 if $lines[0] !~ m/perl/ ;
                return join '', @lines ;
                close $fh ;
                }
            }
        return 0 ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# removes Perl comments
sub decomment {
        my ( $code ) = @_ ;

        #chomp $code ;
        $code =~ s/$RE{comment}{Perl}// ;
        return $code ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# returns an array ref of module names with an array of subroutines
# the module contains
sub module_list {
        my ( $dir ) = @_ ;
        my $directory = abs_path $dir ;
        chdir $directory ;

        my $output ;
        my @directory = glob '*.pm' ;

        @$output = map {
            {
                ( pull_module_name join '/', $directory, $_ . '.pm' ) =>
                    ( get_module_subs join '/', $directory, $_ . '.pm' )
                    }
            }
            map { drop_pm $_ } @directory ;

        return $output ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# returns an array ref to all the functions (minus internal functions
# whose name starts with _) within a given module
sub get_module_subs {
        my ( $mod_path ) = @_ ;
        my @output ;
        if ( -f $mod_path ) {
            if ( open my $fh, '<', $mod_path ) {
                my @lines = <$fh> ;
                push @output, grep { !/^_/ }
                    map  { pull_sub_name $_ }
                    grep { /^\s*sub / } @lines ;
                close $fh ;
                }
            }
        @output = sort @output ;
        return \@output ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# return the package name of a module
sub pull_module_name {
        my ( $mod_path ) = @_ ;
        my @output ;
        if ( -f $mod_path ) {
            if ( open my $fh, '<', $mod_path ) {
                my @lines = <$fh> ;
                push @output, map { pull_package_name $_ }
                    grep { /^\s*package / } @lines ;
                close $fh ;
                }
            }
        return $output[ 0 ] ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# return the package name of a 'package Package::Name ;' string
sub pull_package_name {
        my ( $in ) = @_ ;
        chomp $in ;
        $in = ( split m{\s*package\s*}, $in )[ 1 ] ;
        $in = ( split m/\s*;\s*/,       $in )[ 0 ] ;
        return $in ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# return only the subroutine name from a 'sub sub_name { ' string
sub pull_sub_name {
        my ( $in ) = @_ ;
        chomp $in ;
        $in = ( split m{\s*sub\s*}, $in )[ 1 ] ;
        $in = ( split m/\s*{\s*/,   $in )[ 0 ] ;
        return $in ;
    }

#--------- --------- --------- --------- --------- --------- ---------
# remove '.pm' from end of module file names
sub drop_pm {
        my ( $in ) = @_ ;
        $in =~ s/\.pm$// ;
        return $in ;
    }


1 comment:

  1. I'll confess that I wish I wrote it more Higher Order, but that can be changed later.

    ReplyDelete