#!/usr/bin/env perl

use strict;

sub inst_new
{
    my $address   = shift @_;
    my $bytes     = shift @_;
    my $opcode    = shift @_;
    my $arguments = shift @_;
    my $inst_ref = {'address' => $address,
		    'bytes'   => $bytes,		    
		    'opcode'  => $opcode,
		    'arguments' => $arguments};
    return $inst_ref;
}

sub inst_get_address
{
    my $inst_ref = shift @_;
    return $inst_ref->{'address'};
}

sub inst_get_size
{
    my $inst_ref = shift @_;
    my @bytes = split (/ /, $inst_ref->{'bytes'});
    return scalar @bytes;
}

sub inst_is_jump
{
    my $inst_ref = shift @_;
    if ($inst_ref->{'opcode'} =~ /^j.+/) {
	return 1;
    } else {
	return 0;
    }
}

sub inst_is_ret
{
    my $inst_ref = shift @_;
    if ($inst_ref->{'opcode'} =~ /^ret/) {
	return 1;
    } else {
	return 0;
    }
}

sub inst_get_jump_target
{
    my $inst_ref = shift @_;
    $inst_ref->{'arguments'} =~ /([0-9a-f]+)[ \t]+\<([^\>]+)>/;
    my $jump_target = hex ($1);
    return $jump_target;
}

sub inst_print
{
    my $inst_ref = shift @_;
    print $inst_ref->{'opcode'} . " " . $inst_ref->{'arguments'} . "\n";
}

sub bb_new
{
   my $bb_ref = {'instructions' => []};
   return $bb_ref;
}

sub bb_append_inst
{
    my $bb_ref = shift @_;
    my $inst_ref = shift @_;

    #print "append to bb: ";
    #inst_print ($inst_ref);
    #bb_print (\%bb);

    push @{$bb_ref->{'instructions'}}, $inst_ref;
}

sub bb_split
{
    my $bb_ref = shift @_;
    my $split_location = shift @_;
    my $insts_ref = $bb_ref->{'instructions'};
    my $length = scalar @{$insts_ref};
    my $saved = 0;

    if ($length <= 1) {
	printf ("Should not happen at %x\n", $split_location);
	bb_print ($bb_ref);
    }

    for (my $i = 0; $i < length; $i++) {
	my $inst_ref = $insts_ref->[$i];
	if (inst_get_address ($inst_ref) >= $split_location) {
	    $saved = $i;
	    last;
	}
    }
    my @new_bb_insts = splice @{$insts_ref}, $saved, $length-$saved;
    my $new_bb_ref = {'instructions' => \@new_bb_insts};
    return $new_bb_ref;
}

sub bb_print
{
    my $bb_ref = shift @_;

    printf ("bb %x/%x\n", bb_get_start_address ($bb_ref),bb_get_end_address ($bb_ref));
    #print "bb " .  . "\n";

    for my $inst_ref (@{$bb_ref->{'instructions'}}) {
	inst_print ($inst_ref);
    }
}

sub bb_get_start_address
{
    my $bb_ref = shift @_;
    my @instructions = @{$bb_ref->{'instructions'}};
    my $instruction_ref = $instructions[0];
    return $instruction_ref->{'address'};
}
sub bb_get_end_address
{
    my $bb_ref = shift @_;
    my @instructions = @{$bb_ref->{'instructions'}};
    my $n_instructions = scalar @instructions;
    my $inst_ref = $instructions[$n_instructions-1];
    my $end_address = $inst_ref->{'address'} + inst_get_size ($inst_ref);
    return $end_address;
}

sub bb_is_empty
{
    my $bb_ref = shift @_;
    my @instructions = @{$bb_ref->{'instructions'}};
    my $n_instructions = scalar @instructions;
    if ($n_instructions == 0) {
	return 1;
    } else {
	return 0;
    }
}

sub function_new
{
    my $address = shift @_;
    my $name = shift @_;
    my $function_ref = {
	'name' => $name,
	'address' => $address,
	'jump_target' => [],
	'jump_target_left' => [],
	'bb' => []
    };
    #print "created " . $function_ref . " " . function_get_name ($function_ref) . "\n";
    return $function_ref;
}

sub function_get_name
{
    my %function = %{shift @_};
    return $function{'name'};
}

sub function_append_bb
{
    my $function_ref = shift @_;
    my $bb_ref = shift @_;

    #print "append to function ";
    #bb_print ($bb_ref);
    push @{$function_ref->{'bb'}}, $bb_ref;
}

sub function_append_jump_target
{
    my $function_ref = shift @_;
    my $jump_target = shift @_;

    push @{$function_ref->{'jump_target'}}, $jump_target;
}

sub function_print
{
    my $function_ref = shift @_;

    print function_get_name ($function_ref) . "\n";

    for my $bb_ref (@{$function_ref->{'bb'}}) {
	print "--- ";
	bb_print ($bb_ref);
    }
    print "--------------------------------\n";
}

sub function_insert_bb
{
    my $function_ref = shift @_;
    my $new_bb_ref = shift @_;
    my @bbs = @{$function_ref->{'bb'}};

    push @bbs, $new_bb_ref;
    my @sorted = sort { bb_get_start_address ($a) <=> bb_get_start_address ($b) } @bbs;
    $function_ref->{'bb'} = \@sorted;
}

sub function_get_bbs
{
    my $function_ref = shift @_;
    return @{$function_ref->{'bb'}};
}

sub function_get_start_address
{
    my $function_ref = shift @_;
    return bb_get_start_address ($function_ref->{'bb'}[0]);
}

sub function_get_end_address
{
    my $function_ref = shift @_;
    my $n = scalar @{$function_ref->{'bb'}};
    return bb_get_end_address ($function_ref->{'bb'}[$n-1]);
}

sub function_get_jump_target_left
{
    my $function_ref = shift @_;
    return @{$function_ref->{'jump_target_left'}};
}

sub function_split_bbs
{
    my $function_ref = shift @_;
    my $split_location = shift @_;
    my $fct_start = function_get_start_address ($function_ref);
    my $fct_end = function_get_end_address ($function_ref);
    if ($fct_start > $split_location ||
	$fct_end < $split_location) {
	return 0;
    }

    for my $bb_ref (@{$function_ref->{'bb'}}) {
	if (bb_get_end_address ($bb_ref) > $split_location) {
	    if (bb_get_start_address ($bb_ref) > $split_location) {
		# We found the target bb: we need to split it.
		my $new_bb_ref = bb_split ($bb_ref, $split_location);
		if (bb_is_empty ($new_bb_ref)) {
		    print "new bb empty\n";
		} 
		if (bb_is_empty ($bb_ref)) {
		    print "old bb empty\n";
		}
		function_insert_bb ($function_ref, $new_bb_ref);
	    }
	    last;
	}
    }

    return 1;
}

sub function_finish
{
    my $function_ref = shift @_;

    my $jump_target = shift @{$function_ref->{'jump_target'}};
    while ($jump_target) {
	if (!function_split_bbs ($function_ref, $jump_target)) {
	    push @{$function_ref->{'jump_target_left'}}, $jump_target;
	}
	$jump_target = shift @{$function_ref->{'jump_target'}};
    }
    #function_print ($function_ref);
}

sub functions_parse
{
    my $first_function = 1;
    my $current_function;
    my $current_bb;
    my @functions = ();

    while (<>) {
	if (/([0-9a-f]+):[ \t]+(([0-9a-f]{2}[ \t])+)[ \t]+([a-z,]+)[ \t]+(.*)$/) {
	    my $address = hex ($1);
	    my $bytes = $2;
	    my $opcode = $4;
	    my $arguments = $5;
	    my $inst = inst_new ($address, $bytes, $opcode, $arguments);
	    bb_append_inst ($current_bb, $inst);
	    if (inst_is_jump ($inst)) {
		function_append_bb ($current_function, $current_bb);
		function_append_jump_target ($current_function, 
					     inst_get_jump_target ($inst));
		$current_bb = bb_new ();
	    } elsif (inst_is_ret ($inst)) {
		function_append_bb ($current_function, $current_bb);
		$current_bb = bb_new ();
	    }
	} elsif (/([0-9a-fA-F]+) \<([^>]*)\>:/) {
	    if (!$first_function) {
		if (!bb_is_empty ($current_bb)) {
		    function_append_bb ($current_function, $current_bb);
		}
		function_finish ($current_function);
		push @functions, $current_function;
	    } else {
		$first_function = 0;
	    }
	    my $address = hex ($1);
	    my $name = $2;
	    $current_function = function_new ($address, $name);
	    $current_bb = bb_new ();
	}
    }
    if (!bb_is_empty ($current_bb)) {
	function_append_bb ($current_function, $current_bb);
    }
    function_finish ($current_function);
    push @functions, $current_function;

    return @functions;
}

sub functions_finish
{
    my $functions_ref = shift @_;
    # now, we need to gather all the jumps which left the scope 
    # of a single function to split the bbs of other functions.
    my %jump_target_left_hash = ();
    for my $function_ref (@{$functions_ref}) {
	for my $jump_target (function_get_jump_target_left ($function_ref)) {
	    $jump_target_left_hash{$jump_target} = 1;
	}
    }
    my @jump_target_left = sort keys %jump_target_left_hash;

    my $jump_target = shift @jump_target_left;
    while ($jump_target) {
	for my $function_ref (@{$functions_ref}) {
	    if (function_split_bbs ($function_ref, $jump_target)) {
		last;
	    }
	}
	$jump_target = shift @jump_target_left;
    }
}

my $output_debug = 0;
my $output_stats = 0;

while (scalar @ARGV) {
    my $arg = shift @ARGV;

    if ($arg =~ /--print-debug/) {
	$output_debug = 1;
	next;
    }
    if ($arg =~ /--print-stats/) {
	$output_stats = 1;
	next;
    }
}

my @functions = ();
@functions = functions_parse ();
functions_finish (\@functions);



if ($output_debug) {
    for my $function_ref (@functions) {
	function_print ($function_ref);
    }
}

if ($output_stats) {
    my %statistics = ();
    my $n_bb_bigger_than_5 = 0;
    my $n_bytes_bigger_than_5 = 0;
    my $total_bytes = 0;
    my $total_bb = 0;
    for my $function_ref (@functions) {
	for my $bb_ref (function_get_bbs ($function_ref)) {
	    my $bb_size = bb_get_end_address ($bb_ref) - bb_get_start_address ($bb_ref);
	    if ($bb_size >= 5) {
		$n_bb_bigger_than_5++;
		$n_bytes_bigger_than_5+=$bb_size;
	    }
	    $total_bb++;
	    $total_bytes+=$bb_size;
	    $statistics{$bb_size}++;
	}
    }
    
    for my $size (keys %statistics) {
	printf ("size: %04d n: %d\n", $size, $statistics{$size});
    }

    printf ("percentage of basic blocks bigger than 5 bytes: %.2f\n", 
	    $n_bb_bigger_than_5 / $total_bb * 100);
    printf ("bytes percentage of basic blocks bigger than 5 bytes: %.2f\n", 
	    $n_bytes_bigger_than_5 / $total_bytes * 100);   
}
