#!/usr/bin/env perl
#  Copyright (C) 2000, 2001 Eazel, Inc.
#  Copyright (C) 2002, 2003 Apple Computer, Inc.
#  Copyright (C) 2006 Mathieu Lacage
#
#  This is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public
#  License as published by the Free Software Foundation; either
#  version 2 of the License, or (at your option) any later version.
#
#  This is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public
#  License along with this program; if not, write to the Free
#  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# This script can be used to gather author-based changeset information 
# from cvs repositories. It was written by Mathieu Lacage. The code to
# parse and extract function names from c, c++ and java files was
# written by Darin Adler and Maciej Stachowiak, first at Eazel, Inc
# and then at Apple for a script called prepare-ChangeLog which 
# could automatically generate pre-filled ChangeLog entries with
# the list of functions changes by a given commit.
#
# Darin Adler <darin@bentspoon.com>, started 20 April 2000
# Java support added by Maciej Stachowiak <mjs@eazel.com>
# Objective-C, C++ and Objective-C++ support added by Maciej Stachowiak <mjs@apple.com>
# 

use Date::Manip;
use File::Find;


sub change_new
{
    my $change = {};
    $change->{'log'} = [];
    # initialize ref to empty array.
    return $change;
}

sub change_print
{
    my ($change) = @_;
    print "change by: " .
	$change->{'author'} .
	" on: " . $change->{'working_file'} .
	" in: " . $change->{'module'} .
	" rev: " . $change->{'prev-revision'} . " -> " . $change->{'revision'} .
	" at: " . $change->{'year'} . "/" .
	$change->{'month'} . "/" .
	$change->{'day'} . " " .
	$change->{'hour'} . ":" .
	$change->{'minute'} . ":" .
	$change->{'second'} . " " .
	$change->{'seconds_since_1970'} . " secs\n";
}

sub change_print_log
{
    my ($change) = @_;
    for $log_line (@{$change->{'log'}}) {
	print $log_line;
    }
}
sub change_set_rcs_file
{
    my ($change, $rcs_file) = @_;
    $change->{'rcs_file'} = $rcs_file;
}
sub change_set_working_file
{
    my ($change, $working_file) = @_;
    $change->{'working_file'} = $working_file;
}
sub change_set_revision
{
    my ($change, $revision) = @_;
    $change->{'revision'} = $revision
}
sub change_set_prev_revision
{
    my ($change, $revision) = @_;
    $change->{'prev-revision'} = $revision
}
sub change_set_date
{
    my ($change, $year, $month, $day,
	$hour, $minute, $second) = @_;
    $change->{'year'} = $year;
    $change->{'month'} = $month;
    $change->{'day'} = $day;
    $change->{'hour'} = $hour;
    $change->{'minute'} = $minute;
    $change->{'second'} = $second;
    $change->{'seconds_since_1970'} = Date_SecsSince1970 ($month, $day, $year,
							  $hour, $minute, $second);
}
sub change_set_author
{
    my ($change, $author) = @_;
    $change->{'author'} = $author;
}
sub change_set_module
{
    my ($change, $module) = @_;
    $change->{'module'} = $module;
}
sub change_append_log_line
{
    my ($change, $line) = @_;
    push @{$change->{'log'}}, $line;
}
sub change_is_log_equal
{
    my ($change,$other) = @_;

    my @log = @{$other->{'log'}};
    my $line;
    for $line (@{$change->{'log'}}) {
	my $other_line = shift @log;
	if (! $line eq $other_line) {
	    return 0;
	}
    }
    return 1;
}
sub change_get_log
{
    my ($change) = @_;
    return @{$change->{'log'}};
}
sub change_get_seconds_since_1970
{
    my ($change) = @_;
    return $change->{'seconds_since_1970'};
}
sub change_get_author
{
    my ($change) = @_;
    return $change->{'author'};
}
sub change_get_module
{
    my ($change) = @_;
    return $change->{'module'};
}
sub change_get_revision
{
    my ($change) = @_;
    return $change->{'revision'};
}
sub change_get_prev_revision
{
    my ($change) = @_;
    return $change->{'prev-revision'};
}
sub change_get_file
{
    my ($change) = @_;
    return $change->{'working_file'};
}
sub change_is_code
{
    my ($change) = @_;
    my $filename = $change->{'working_file'};
    if ($filename =~ /\.(h|c|cpp|hpp|java)/) {
	return 1;
    }
    return 0;
}

sub changed_range_new
{
    my ($start, $end) = @_;
    my $changed_range = [$start, $end];
    return $changed_range;
}
sub changed_range_get_start
{
    my ($changed_range) = @_;
    return $changed_range->[0];
}
sub changed_range_get_end
{
    my ($changed_range) = @_;
    return $changed_range->[1];
}

sub change_get_changed_ranges
{
    my ($change, $repo_dir, $module) = @_;

    my $rcsdiff_command = 'rcsdiff -q ' . 
	'-r' . $change->{'prev-revision'} . 
	' -r' . $change->{'revision'} . ' ' .
	$repo_dir . '/' . $change->{'module'} . '/' . $change->{'rcs_file'} . ' |';
    
    open (DIFF, $rcsdiff_command) || die "Could not open \"$rcsdiff_command\" -- $!";
    my @changed_ranges;
    while (<DIFF>) {
	## regex stolen from prepare-Changelog
	if (/^\d+(,\d+)?[acd](\d+)(,(\d+))?/) {
	    push @changed_ranges, changed_range_new ($2, $4 || $2 );
	}
    }
    close (DIFF);

    #foreach my $r (@changed_ranges) {
    #print "start=" . changed_range_get_start ($r) . ", end=" . 
    #changed_range_get_end ($r) . "\n";
    #}

    return @changed_ranges;
}

sub function_range_new
{
    my $function_range = {};
    return $function_range;
}
sub function_range_get_start
{
    my ($function_range) = @_;
    return $function_range->{'start'};
}
sub function_range_get_end
{
    my ($function_range) = @_;
    return $function_range->{'end'};
}
sub function_range_get_name
{
    my ($function_range) = @_;
    return $function_range->{'name'};
}
sub function_range_set_start
{
    my ($function_range, $start) = @_;
    $function_range->{'start'} = $start;
}
sub function_range_set_end
{
    my ($function_range, $end) = @_;
    $function_range->{'end'} = $end;
}
sub function_range_set_name
{
    my ($function_range, $name) = @_;
    $function_range->{'name'} = $name;
}

# Read a file and get all the line ranges of the things that look like C functions.
# A function name is the last word before an open parenthesis before the outer
# level open brace. A function starts at the first character after the last close
# brace or semicolon before the function name and ends at the close brace.
# Comment handling is simple-minded but will work for all but pathological cases.
#
# Result is a list of triples: [ start_line, end_line, function_name ].

sub get_c_function_ranges
{
    my ($file_name) = @_;
    #print "parsing \"$file_name\"\n";
    open (FILE, $file_name) || die "unable to open \"$file_name\"";

    my @ranges;

    my $in_comment = 0;
    my $in_macro = 0;
    my $in_method_declaration = 0;
    my $in_parentheses = 0;
    my $in_braces = 0;
    my $brace_start = 0;
    my $brace_end = 0;
    my $skip_til_brace_or_semicolon = 0;

    my $word = "";
    my $interface_name = "";

    my $potential_method_char = "";
    my $potential_method_spec = "";

    my $potential_start = 0;
    my $potential_name = "";

    my $start = 0;
    my $name = "";

    my $next_word_could_be_namespace = 0;
    my $potential_namespace = "";
    my @namespaces;

    while (<FILE>)
      {
        # Handle continued multi-line comment.
        if ($in_comment)
          {
            next unless s-.*\*/--;
            $in_comment = 0;
          }

        # Handle continued macro.
        if ($in_macro)
          {
            $in_macro = 0 unless /\\$/;
            next;
          }

        # Handle start of macro (or any preprocessor directive).
        if (/^\s*\#/)
          {
            $in_macro = 1 if /^([^\\]|\\.)*\\$/;
            next;
          }

        # Handle comments and quoted text.
        while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy
          {
            my $match = $1;
            if ($match eq "/*")
              {
                if (!s-/\*.*?\*/--)
                  {
                    s-/\*.*--;
                    $in_comment = 1;
                  }
              }
            elsif ($match eq "//")
              {
                s-//.*--;
              }
            else # ' or "
              {
                if (!s-$match([^\\]|\\.)*?$match--)
                  {
                    warn "mismatched quotes at line $. in $file_name\n";
                    s-$match.*--;
                  }
              }
          }


        # continued method declaration
        if ($in_method_declaration) 
          {
              my $original = $_;
              my $method_cont = $_;

              chomp $method_cont;
              $method_cont =~ s/[;\{].*//;
              $potential_method_spec = "${potential_method_spec} ${method_cont}";

              $_ = $original;
              if (/;/) 
                {
                  $potential_start = 0;
                  $potential_method_spec = "";
                  $potential_method_char = "";
                  $in_method_declaration = 0;
                  s/^[^;\{]*//;
                } elsif (/{/) {
                  my $selector = method_decl_to_selector ($potential_method_spec);
                  $potential_name = "${potential_method_char}\[${interface_name} ${selector}\]";
                  
                  $potential_method_spec = "";
                  $potential_method_char = "";
                  $in_method_declaration = 0;
  
                  $_ = $original;
                  s/^[^;{]*//;
                } else {
                  next;
                }
          }

        
        # start of method declaration
        if ((my $method_char, my $method_spec) = m&^([-+])([^0-9;][^;]*)$&)
          {
            my $original = $_;

            if ($interface_name) 
              {
                chomp $method_spec;
                $method_spec =~ s/\{.*//;
            
                $potential_method_char = $method_char;
                $potential_method_spec = $method_spec;
                $potential_start = $.;
                $in_method_declaration = 1;
              } else { 
                warn "declaring a method but don't have interface on line $. in $file_name\n";
              }
            $_ = $original;
            if (/\{/) {
              my $selector = method_decl_to_selector ($potential_method_spec);
              $potential_name = "${potential_method_char}\[${interface_name} ${selector}\]";
              
              $potential_method_spec = "";
              $potential_method_char = "";
              $in_method_declaration = 0;
              $_ = $original;
              s/^[^{]*//;
            } else {
              next;
            }
          }


        # Find function, interface and method names.
        while (m&((?:[[:word:]]+::)*operator(?:[ \t]*\(\)|[^()]*)|[[:word:]:~]+|[(){}:;])|\@(?:implementation|interface)\s+(\w+)[^{]*&g)
          {
            # interface name
            if ($2) 
              {
                $interface_name = $2;
                next;
              }

            # Open parenthesis.
            if ($1 eq "(")
              {
                $potential_name = $word unless $in_parentheses || $skip_til_brace_or_semicolon;
                $in_parentheses++;
                next;
              }

            # Close parenthesis.
            if ($1 eq ")")
              {
                $in_parentheses--;
                next;
              }

            # C++ constructor initializers
            if ($1 eq ":")
              {
                  $skip_til_brace_or_semicolon = 1 unless ($in_parentheses || $in_braces);
              }

            # Open brace.
            if ($1 eq "{")
              {
                $skip_til_brace_or_semicolon = 0;

                if ($potential_namespace) {
                    push @namespaces, $potential_namespace;
                    $potential_namespace = "";
                    next;
                }

                # Promote potential name to real function name at the
                # start of the outer level set of braces (function body?).
                if (!$in_braces and $potential_start)
                  {
                    $start = $potential_start;
                    $name = $potential_name;
                    if (@namespaces && (length($name) < 2 || substr($name,1,1) ne "[")) {
                        $name = join ('::', @namespaces, $name);
                    }
                  }

                $in_method_declaration = 0;

                $brace_start = $. if (!$in_braces);
                $in_braces++;
                next;
              }

            # Close brace.
            if ($1 eq "}")
              {
                if (!$in_braces && @namespaces) {
                    pop @namespaces;
                    next;
                }

                $in_braces--;
                $brace_end = $. if (!$in_braces);

                # End of an outer level set of braces.
                # This could be a function body.
                if (!$in_braces and $name)
                  {
		      my $tmp = {};
		      my $function_range = function_range_new ();
		      function_range_set_name ($function_range, $name);
		      function_range_set_start ($function_range, $start);
		      function_range_set_end ($function_range, $.);
		      push @ranges, $function_range;
		      $name = "";
                  }

                $potential_start = 0;
                $potential_name = "";
                next;
              }

            # Semicolon.
            if ($1 eq ";")
              {
                $skip_til_brace_or_semicolon = 0;
                $potential_start = 0;
                $potential_name = "";
                $in_method_declaration = 0;
                next;
              }

            # Ignore "const" method qualifier.
            if ($1 eq "const") {
                next;
            }

            if ($1 eq "namespace" || $1 eq "class" || $1 eq "struct") {
                $next_word_could_be_namespace = 1;
                next;
            }

            # Word.
            $word = $1;
            if (!$skip_til_brace_or_semicolon) {
              if ($next_word_could_be_namespace) {
                $potential_namespace = $word;
                $next_word_could_be_namespace = 0;
              } elsif ($potential_namespace) {
                $potential_namespace = "";
              }

              if (!$in_parentheses) {
                $potential_start = 0;
                $potential_name = "";
              }
              if (!$potential_start) {
                $potential_start = $.;
                $potential_name = "";
              }
            }
          }
      }

    warn "missing close braces in $file_name (probable start at $brace_start)\n" if ($in_braces > 0);
    warn "too many close braces in $file_name (probable start at $brace_end)\n" if ($in_braces < 0);

    warn "mismatched parentheses in $file_name\n" if $in_parentheses;

    close (FILE);

    return @ranges;
  }



sub get_java_function_ranges
{
    my ($file_name) = @_;
    #print "parsing \"$file_name\"\n";
    open (FILE, $file_name) || die "unable to open \"$file_name\"";

    my @current_scopes;

    my @ranges;

    my $in_comment = 0;
    my $in_macro = 0;
    my $in_parentheses = 0;
    my $in_braces = 0;
    my $in_non_block_braces = 0;
    my $class_or_interface_just_seen = 0;

    my $word = "";

    my $potential_start = 0;
    my $potential_name = "";
    my $potential_name_is_class_or_interface = 0;

    my $start = 0;
    my $name = "";
    my $current_name_is_class_or_interface = 0;

    while (<FILE>)
      {
        # Handle continued multi-line comment.
        if ($in_comment)
          {
            next unless s-.*\*/--;
            $in_comment = 0;
          }

        # Handle continued macro.
        if ($in_macro)
          {
            $in_macro = 0 unless /\\$/;
            next;
          }

        # Handle start of macro (or any preprocessor directive).
        if (/^\s*\#/)
          {
            $in_macro = 1 if /^([^\\]|\\.)*\\$/;
            next;
          }

        # Handle comments and quoted text.
        while (m-(/\*|//|\'|\")-) # \' and \" keep emacs perl mode happy
          {
            my $match = $1;
            if ($match eq "/*")
              {
                if (!s-/\*.*?\*/--)
                  {
                    s-/\*.*--;
                    $in_comment = 1;
                  }
              }
            elsif ($match eq "//")
              {
                s-//.*--;
              }
            else # ' or "
              {
                if (!s-$match([^\\]|\\.)*?$match--)
                  {
                    warn "mismatched quotes at line $. in $file_name\n";
                    s-$match.*--;
                  }
              }
          }

        # Find function names.
        while (m-(\w+|[(){};])-g)
          {
            # Open parenthesis.
            if ($1 eq "(")
              {
                if (!$in_parentheses) {
                    $potential_name = $word;
                    $potential_name_is_class_or_interface = 0;
                }
                $in_parentheses++;
                next;
              }

            # Close parenthesis.
            if ($1 eq ")")
              {
                $in_parentheses--;
                next;
              }

            # Open brace.
            if ($1 eq "{")
              {
                # Promote potential name to real function name at the
                # start of the outer level set of braces (function/class/interface body?).
                if (!$in_non_block_braces
                    and (!$in_braces or $current_name_is_class_or_interface)
                    and $potential_start)
                  {
                    if ($name)
                      {
			  my $function_range = function_range_new ();
			  function_range_set_start ($function_range, $start);
			  function_range_set_end ($function_range, $. - 1);
			  function_range_set_name ($function_range, join ('.', @current_scopes));
                          push @ranges, $function_range;
                      }


                    $current_name_is_class_or_interface = $potential_name_is_class_or_interface;

                    $start = $potential_start;
                    $name = $potential_name;

                    push (@current_scopes, $name);
                  } else {
                      $in_non_block_braces++;
                  }

                $potential_name = "";
                $potential_start = 0;

                $in_braces++;
                next;
              }

            # Close brace.
            if ($1 eq "}")
              {
                $in_braces--;

                # End of an outer level set of braces.
                # This could be a function body.
                if (!$in_non_block_braces)
                  {
                    if ($name)
                      {
			  my $function_range = function_range_new ();
			  function_range_set_start ($function_range, $start);
			  function_range_set_end ($function_range, $.);
			  function_range_set_name ($function_range, join ('.', @current_scopes));

			  push @ranges, $function_range;

                        pop (@current_scopes);

                        if (@current_scopes)
                          {
                            $current_name_is_class_or_interface = 1;

                            $start = $. + 1;
                            $name =  $current_scopes[$#current_scopes-1];
                          }
                        else
                          {
                            $current_name_is_class_or_interface = 0;
                            $start = 0;
                            $name =  "";
                          }
                    }
                  }
                else
                  {
                    $in_non_block_braces-- if $in_non_block_braces;
                  }

                $potential_start = 0;
                $potential_name = "";
                next;
              }

            # Semicolon.
            if ($1 eq ";")
              {
                $potential_start = 0;
                $potential_name = "";
                next;
              }

            if ($1 eq "class" or $1 eq "interface")
              {
                $class_or_interface_just_seen = 1;
                next;
              }

            # Word.
            $word = $1;
            if (!$in_parentheses)
              {
                if ($class_or_interface_just_seen) {
                    $potential_name = $word;
                    $potential_start = $.;
                    $class_or_interface_just_seen = 0;
                    $potential_name_is_class_or_interface = 1;
                    next;
                }
              }
            if (!$potential_start)
              {
                $potential_start = $.;
                $potential_name = "";
              }
            $class_or_interface_just_seen = 0;
          }
      }

    warn "mismatched braces in $file_name\n" if $in_braces;
    warn "mismatched parentheses in $file_name\n" if $in_parentheses;

    close (FILE);

    return @ranges;
}

sub change_get_function_ranges
{
    my ($change, $repo_dir, $module_dir) = @_;
    my $rcs_command = "co -p -q" . 
	$change->{'revision'} . " " .
	$repo_dir . '/' .
	$change->{'module'} . '/' .
	$change->{'rcs_file'} . ' |';
    #print $rcs_command . "\n";
    my $filename = $change->{'working_file'};
    my @function_ranges;
    if ($filename =~ /\.(c|cpp|h|m|mm)$/) {
	@function_ranges = get_c_function_ranges ($rcs_command);
    } elsif ($filename =~ /\.java$/) {
	@function_ranges = get_java_function_ranges ($rcs_command);
    }
    return @function_ranges;
}

sub change_get_changed_functions
{
    my ($change, $repo_dir, $module_dir) = @_;

    my @changed_ranges = change_get_changed_ranges ($change, $repo_dir);
    my @function_ranges = change_get_function_ranges ($change, $repo_dir, $module_dir);

    # merge logic stolen from prepare-ChangeLog
    my @functions;
  FUNCTION: 
    foreach my $function_range (@function_ranges) {
	# Advance to successive change ranges.
	foreach my $change_range (@changed_ranges) {
	    # If past this function, move on to the next one.
	    if (changed_range_get_start ($change_range) > function_range_get_end ($function_range)) {
		next FUNCTION;
	    }

	    # If an overlap with this function range, record the function name.
	    if (changed_range_get_end ($change_range) >= function_range_get_start ($function_range) &&
		changed_range_get_start ($change_range) <= function_range_get_end ($function_range)) {
		push @functions, function_range_get_name ($function_range);
		next FUNCTION;
	    }
	}
    }
  done:
    @sorted = sort @functions;
    
    return @sorted;
}

sub changeset_new
{
    my ($change) = @_;
    my $changeset = {};
    push @{$changeset->{'changes'}},$change;
    $changeset->{'low'} = change_get_seconds_since_1970 ($change);
    $changeset->{'high'} = change_get_seconds_since_1970 ($change);
    return $changeset;
}
sub changeset_contains_change
{
    my ($changeset, $change, $threshold) = @_;
    my $low = $changeset->{'low'};
    my $high = $changeset->{'high'};
    my $delta_low = change_get_seconds_since_1970 ($change) - $low;
    my $delta_high = change_get_seconds_since_1970 ($change) - $high;
    if ($delta_low < 0) {
	$delta_low = - $delta_min;
    }
    if ($delta_high < 0) {
	$delta_high = - $delta_high;
    }
    if ($delta_high < $threshold || $delta_high < $threshold) {
	# now, check the log message.
	if (change_is_log_equal ($changeset->{'changes'}[0], $change)) {
	    return 1;
	}
    }
    return 0;
}
sub changeset_add_change
{
    my ($changeset, $change) = @_;
    push @{$changeset->{'changes'}},$change;
    my $seconds = change_get_seconds_since_1970 ($change);
    if ($seconds < $changeset->{'low'}) {
	$changeset->{'low'} = $seconds;
    } elsif ($seconds < $changeset->{'high'}) {
	$changeset->{'high'} = $seconds;
    }
}
sub changeset_get_author_name
{
    return change_get_author (@{$changeset->{'changes'}}[0]);
}
sub changeset_get_module_name
{
    return change_get_module (@{$changeset->{'changes'}}[0]);
}
sub changeset_get_changes
{
    return @{$changeset->{'changes'}};
}
sub changeset_get_n_changes
{
    my $n = @{$changeset->{'changes'}};
    return $n;
}

sub changeset_print
{
    my ($changeset) = @_;
    my $size = @{$changeset->{'changes'}};
    print "changeset with $size elements:\n";
    for $change (@{$changeset->{'changes'}}) {
	change_print ($change);
    }
}
sub author_new
{
    my ($name) = @_;
    my $author = {};
    $author->{'name'} = $name;
}
sub author_get_name
{
    my ($author) = @_;
    return $author->{'name'};
}
# note: each change should be added in the 
# correct time-based order.
sub author_add_change
{
    my ($author, $change, $threshold) = @_;
    # can we find a change which contains the same 
    # log message and roughly the same date ?
    my $changeset;
    my @changesets = reverse (@{$author->{'changesets'}});
    for $changeset (@changesets) {
	if (changeset_contains_change ($changeset, $change, $threshold)) {
	    changeset_add_change ($changeset, $change);
	    return;
	}
    }
    $changeset = changeset_new ($change);
    # make sure the change timestamp is correctly increasing.
    if ($author->{'last_change'} > change_get_seconds_since_1970 ($change)) {
	print "changes not inserted in the right order !\n";
	exit (1);
    }
    $author->{'last_change'} = change_get_seconds_since_1970 ($change);
    push @{$author->{'changesets'}}, $changeset;
}
sub author_get_changesets
{
    return @{$author->{'changesets'}};
}
sub author_print_changesets
{
    my ($author) = @_;
    for $changeset (@{$author->{'changesets'}}) {
	changeset_print ($changeset);
    }
}

sub authors_new
{
    my $authors = {};
    return $authors;
}
sub authors_lookup_by_name
{
    my ($authors, $name) = @_;
    if ($authors->{$name}) {
	return $authors->{$name};
    }
    $authors->{$name} = author_new ($name);
    return $authors->{$name};
}
sub authors_get
{
    my ($authors) = @_;
    return values %{$authors};
}
sub authors_print
{
    my ($authors) = @_;
    for $author (keys %{$authors}) {
	print $author . "\n";
    }
}
sub authors_print
{
    my ($authors) = @_;
    for $author (keys %{$authors}) {
	print $author . "\n";
    }
}





sub print_help 
{
    print "usage: analyze-cvs-repo.pl [options] --repo-dir=[local repository dir]\n";
    print "  options: \n";
    print "    --changeset-threshold=[time-based threshold to detect changesets.]\n";
}

sub get_module_list
{
    my $repo_dir = shift @_;
    my @modules = ();
    opendir (DIR, $repo_dir) or die "can't opendir $repo_dir: $!";
    while (defined($file = readdir(DIR))) {
	if (!($file =~ /CVSROOT/) && 
	    !($file =~ /\./) &&
	    !($file =~ /\.\./)) {
	    push @modules, $file;
	}
    }
    closedir (DIR);
    return @modules;
}


sub delta_tree_read_from_file
{
    my ($filename) = @_;
    my $delta_tree = {};

    open (RCS, $filename) || die "Could not open \"$filename\" -- $!";

    #print "reading $filename\n";

    my $line;
    my $rev;
    my $state = 0;
    while ($line = <RCS>) {
	if ($state == 0) {
	    if ($line =~ /head[ \t]+[0-9\.]+;/) {
		$state++;
	    }
	} elsif ($state == 1) {
	    if ($line =~ /access[^;]*;$/) {
		$state++;
	    }
	} elsif ($state == 2) {
	    if ($line =~ /symbols[^;]*;$/) {
		$state++;
	    } elsif ($line =~ /;$/) {
		$state++;
	    }
	} elsif ($state == 3) {
	    if ($line =~ /locks[^;]*;.*strict.*;$/) {
		$state++;
	    }
	} elsif ($state == 4) {
	    if ($line =~ /([0-9\.]+)$/) {
		$rev = $1;
		$state++;
	    } elsif ($line =~ /^desc$/) {
		goto done;
	    }
	} elsif ($state == 5) {
	    if ($line =~ /^branches[ \t]*;$/) {
		$state+=2;
	    } elsif ($line =~ /^branches[ \t]*([0-9\.]+);$/) {
		my $branch = $1;
		$delta_tree->{$branch} = $rev;
		$state+=2;
	    } elsif ($line =~ /^branches$/) {
		$state++;
	    }
	} elsif ($state == 6) {
	    if ($line =~ /^[ \t]+([0-9\.]+)$/) {
		my $branch = $1;
		$delta_tree->{$branch} = $rev;
	    } elsif ($line =~ /^[ \t]+([0-9\.]+);$/) {
		my $branch = $1;
		$delta_tree->{$branch} = $rev;
		$state++;
	    }
	} elsif ($state == 7) {
	    if ($line =~ /^next[ \t]+([0-9\.]+);$/) {
		my $next = $1;
		my @revs = split (/\./, $next);
		my $n_revs = @revs;
		if (($n_revs / 2) >= 2) {
		    $delta_tree->{$next} = $rev;
		    #print "$next -> $rev\n";
		} else {
		    $delta_tree->{$rev} = $next;
		    #print "$rev -> $next\n";
		}
		$state = 4;
	    }
	}
    }

  done:

    close (RCS);
    
    return $delta_tree;
}

sub delta_tree_get_prev
{
    my ($delta_tree, $current) = @_;
    return $delta_tree->{$current};
}


my @find_list_result_cb = ();
sub find_cb
{
    if ($_ =~ /,v$/) {
	push @find_list_result_cb, $File::Find::name;
    }
}

sub read_log
{
    my ($repo_dir, $module) = @_;
    my @changes = ();
    find (\&find_cb, ($repo_dir . '/' . $module));
    my @files = @find_list_result_cb;
  FILE: 
    foreach my $file (@files) {
	my $rlog_command = "rlog $file |";
	my $delta_tree = delta_tree_read_from_file ($file);
	open (RLOG, $rlog_command) || die "Could not open \"$rlog_command\" -- $!";
	my $line;
	my $state = 0;
	my $current_change;
	my $working_file;
	my $rcs_file;
	while ($line = <RLOG>) {
	    if ($state == 0) {
		if ($line =~ /RCS file: $repo_dir\/$module\/(.*,v)$/) {
		    $rcs_file = $1;
		    $state++;
		}
	    } elsif ($state == 1) {
		if ($line =~ /Working file: (.*)$/) {
		    $working_file = $1;
		    $state++;
		}
	    } elsif ($state == 2) {
		if ($line =~ /total revisions: ([0-9]+);[ \t]*selected revisions: ([0-9]+)/) {
		    $state++;
		}
	    } elsif ($state == 3) {
		if ($line =~ /----------------------------/) {
		    $state++;
		}
	    } elsif ($state == 4) {
		if ($line =~ /revision ([0-9\.]+)/) {
		    $current_change = change_new ();
		    change_set_rcs_file ($current_change, $rcs_file);
		    change_set_working_file ($current_change, $working_file);
		    change_set_module ($current_change, $module);
		    change_set_revision ($current_change, $1);
		    change_set_prev_revision ($current_change, delta_tree_get_prev ($delta_tree, $1));
		    $state++;
		}
	    } elsif ($state == 5) {
		if ($line =~ /date: ([0-9]+)\/([01][0-9])\/([0123][0-9]) ([012][0-9]):([0-5][0-9]):([0-5][0-9]);[ \t]+author: ([^;]+);/) {
		    my $year = $1;
		    my $month = $2;
		    my $day = $3;
		    my $hour = $4;
		    my $minute = $5;
		    my $second = $6;
		    my $author = $7;
		    change_set_date ($current_change, 
				 $year, $month, $day, 
				     $hour, $minute, $second);
		    change_set_author ($current_change, $author);
		    $state++;
		}
	    } elsif ($state == 6) {
		if ($line =~ /----------------------------$/) {
		    $state = 4;
		    #change_print ($current_change);
		    push @changes, $current_change;
		} elsif ($line =~ /=============================================================================$/) {
		    $state = 0;
		    next FILE;
		} elsif ($line =~ /branches:/) {
		} else {
		    change_append_log_line ($current_change, $line);
		}
	    }
	}
	close (RLOG);
    }

    return @changes;
}

sub by_date
{
    $a->{'seconds_since_1970'} <=> $b->{'seconds_since_1970'}
}

sub gather_author_changesets
{
    my ($threshold, @changes) = @_;
    my $authors = authors_new ();

    $old_handle = select (STDOUT);
    $| = 1; # perform flush after each write to STDOUT

    print "gather author changesets ";

    # using this sorted array to process each change is
    # a heuristic optimization: it increases the probability
    # of processing the changes of a given changeset together
    # which decreases the cost of author_add_change.
    my @sorted = sort by_date @changes;

    my $n = @changes;
    my $step = $n / 100;
    if ($step == 0) {
	$step = 1;
    }
    my $i = 0;
    for $change (@sorted) {
	my $author = authors_lookup_by_name ($authors, change_get_author ($change));
	author_add_change ($author, $change, $threshold);
	if ($i % $step == 0) {
	    print ".";
	}
	$i++;
    }
    print "\n";

    select ($old_handle);

    return $authors;
}



my $repo_dir;
my $changeset_threshold = 30;
my $n = 0;

while (@ARGV) {
    my $tmp = shift @ARGV;
    if ($tmp =~ /--repo-dir=(.*)/) {
	$repo_dir = $1;
	$repo_dir =~ s/\/$//;
	$n++;
    } elsif ($tmp =~ /--changeset-threshold=(.*)/) {
	$changeset_threshold = $1;
    } else {
	print_help ();
	exit (1);
    }
}

if ($n < 1) {
    print_help ();
    exit (1);
}

my @modules = get_module_list ($repo_dir);
my @changes = ();
for $module (@modules) {
    print "read log $module...\n";
    my @module_changes = read_log ($repo_dir, $module);
    @changes = (@changes, @module_changes);
}
my $authors = gather_author_changesets ($changeset_threshold, @changes);
for $author (authors_get ($authors)) {
    for $changeset (author_get_changesets ($author)) {
	print "changeset changes=" . changeset_get_n_changes ($changeset) .
	    " by=" . changeset_get_author_name ($changeset) . 
	    " module=" . changeset_get_module_name ($changeset) 
	    . "\n";
	for $change (changeset_get_changes ($changeset)) {
	    print "  change " .
		" file=" . change_get_file ($change) .
		" revision=" . change_get_prev_revision ($change) . '->' . change_get_revision ($change);
	    if (change_is_code ($change)) {
		my @functions = change_get_changed_functions ($change, $repo_dir);
		my $n_functions = @functions;
		print ", changed=" . $n_functions . "\n";
		if ($n_functions > 0) {
		    my $i = 0;
		    print "    functions=";
		    foreach my $function (@functions) {
			print $function;
			$i++;
			if ($i < $n_functions) {
			    print ', ';
			}
		    }
		    print "\n";
		}
	    } else {
		print "\n";
	    }
	}
    }
}

