!/usr/bin/perl
$|=1;

# convert a graph in the form of an image to x,y format data
# Rob Izzard, 14 July 2005 (Grenouille Day)

# usage: a2data <image file>
# where the image file is something that Image Magick can read (most things!)

# You may use this for whatever you like but I take no responsibility for
# its use or misuse. Remember copyright!  

use strict; 
use Image::Magick; # you need this module (comes with most linuxes)
use Term::ANSIColor; # and this one (should be standard)

# name of input file
my $infile=$ARGV[0];
if(!($infile)){help();}

# global variables
my $width;
my $height;
my $mod;
my @bitmap; # bitmap to store the image
my $exit_boolean=0; # set to 1 when we want to exit
my $axis_fill_threshold;
my @xaxes; # the x axis range in pixels
my @yaxes; # the y axis range in pixels
my @xrange; # the x axis range in actual units
my @yrange; # the y axis range in actual units
my @data; # guessed data, in x,y pixel format
my @outdata; # guessed data, in a suitable output format
my $ndatapoints; # number of guessed data points
my $tic_threshold; # fraction of the graph to be ignored near borders
my $dumpfile="/tmp/dump"; # output file
my $number_of_datalines=1;
my $resolution;
my @image_info;
my @labels=('Width','Height','# Colours','File Size','Height?','Width?','x-resolution','y-resolution');

print "Loading previous settings...\x0d";
reset_to_defaults();
load_settings();

# use image magick to load in the file (NB the file might
# be non-postscript)
my $postscript_image=Image::Magick->new;
$postscript_image->Set(density=>$resolution);

read_image();

while($exit_boolean==0)
{
    menu();
}
save_settings();
exit;

############################################################################
# subroutines...
############################################################################

sub save_settings
{
    open(SET,">".$ENV{'HOME'}.'/.a2data');
    print SET join(' ',@xaxes),"\n";
    print SET join(' ',@yaxes),"\n";
    print SET join(' ',@xrange),"\n";
    print SET join(' ',@yrange),"\n";
    print SET join(' ',($axis_fill_threshold,$dumpfile,$tic_threshold,$number_of_datalines,$resolution)),"\n";
    close SET;
}

sub load_settings
{
    if(-s $ENV{'HOME'}.'/.a2data' < 10)
    {
	reset_to_defaults();
	save_settings;
    }
    else
    {
	open(SET,"<".$ENV{'HOME'}.'/.a2data');
	@xaxes=split(/\s/,<SET>);
	@yaxes=split(/\s/,<SET>);
	@xrange=split(/\s/,<SET>);
	@yrange=split(/\s/,<SET>);
	($axis_fill_threshold,$dumpfile,$tic_threshold,$number_of_datalines,$resolution)=split(/\s/,<SET>);
	close SET;
    }
}

sub reset_to_defaults
{
    # set default parameters
    $tic_threshold=0.02;
    $ndatapoints=0;
    $mod;
    $exit_boolean=0; 
    $axis_fill_threshold=0.6;
    @xaxes=(0,$width); 
    @yaxes=(0,$height);
    @xrange=(0,1);
    @yrange=(0,1);
    $dumpfile="/tmp/dump"; # output file
    $number_of_datalines=1;
    $resolution=72; # DPI
    @bitmap=();
    @data=();
    @outdata=();
}

sub read_image
{
    print "Loading image at $resolution DPI...\x0d";
    
    # clear and reload image
    @$postscript_image=();
    $postscript_image->Set(density=>$resolution);
    $postscript_image->Read($infile);
    @image_info=$postscript_image->Get('base-columns',
				       'base-rows',
				       'colors',
				       'filesize',
				       'height',
				       'width',
				       'x-resolution',
				       'y-resolution');
    $width=$image_info[0];
    $height=$image_info[1];
    $mod=int($width/20.0);
    print "Done                    \x0d";
    @bitmap=(); # clear
    @data=();
    @outdata=();
}

sub menu
{
    # hash: key=display name, value=subroutine to call
    my %h=("1 Display Image",'display_image',
	   "2 Make Bitmap",'make_bitmap',
	   "3 Guess Axis Locations (@xaxes, @yaxes)",'guess_axes_wrapper',
	   "4 Manually Enter Axis Locations (@xaxes, @yaxes)",'enter_axes',
	   "5 Display Guessed Axes",'display_guessed_axes',
	   "6 Set Axis Fill Threshold ($axis_fill_threshold)",'set_axis_fill_threshold', 
	   
	   "7 Enter Axis Data Ranges (X:@xrange, Y:@yrange)",'enter_axis_ranges',
	   "8 Guess Data",'guess_data',
	   "9 Display Data Guess",'draw_data',
	   "10 Dump Data",'dump_data',
	   "11 Dump Datafile ($dumpfile)",'set_dumpfile',
	   "12 Set Tic threshold ($tic_threshold)",'set_tic_threshold',
	   "13 Set Number of Data Curves ($number_of_datalines)",'set_num_datalines',
	   "14 Show image information",'image_info',
	   "15 Quit",'exit_program',
	   "16 Reset",'reset_to_defaults',
	   "17 Reload image",'read_image',
	   "18 Set Import Resolution ($resolution DPI)",'change_import_resolution',
	   );
    my @k= sort anum keys %h;
    map
    {
	s/^(\d+)//o;
	$k[$1]=$_;
    }keys %h;

    # output menu items
    for(my $count=1;$count<=$#k;$count++)
    {
	printf "%s% 3d: %s%s\n",color('bold red'),($count),color('reset yellow'),$k[$count];
    }
    print color('white');
    
    my $answer=stdin();
    if($answer ne '')
    {
	my $k=$answer.$k[$answer];
	#print "Answer was $answer -> k = $k[$answer] key $k->",$h{$k},"\n";
	if($k[$answer])
	{
	    my $cmd=$h{$k}.'();';
	    #print "EVAL $cmd\n";
	    eval $cmd;
	}
	save_settings();
    }

}



sub enter_axis_ranges
{
    print "Current X axis range $xrange[0] to $xrange[1], please enter a new X minimum\n";
    my $i=stdin();
    if($i ne '')
    {
	$xrange[0]=$i;
    }
    print "Please enter a new X maximum:\n";
    $i=stdin();
    if($i ne '')
    {  
	$xrange[1]=$i;
    }
    print "Current Y axis range $yrange[0] to $yrange[1], please enter a new Y minimum\n";
    $i=stdin();
    if($i ne '')
    {  
	$yrange[0]=$i;
    }
    print "Please enter a new Y maximum:\n";
    $i=stdin();
    if($i ne '')
    {  
	$yrange[1]=$i;
    }
}
sub enter_axes
{
    print "Current X axes at $xaxes[0] and $xaxes[1], please enter a new X1\n";
    $xaxes[0]=stdin();
    print "Please enter a new X2\n";
    $xaxes[1]=stdin();
    print "Current Y axes at $yaxes[0] and $yaxes[1], please enter a new Y1\n";
    $yaxes[0]=stdin();
    print "Please enter a new Y2\n";
    $yaxes[1]=stdin();
}
sub stdin
{
    my $i=<STDIN>;
    chomp $i;
    return $i;
}

sub set_axis_fill_threshold
{
    print "Current the axis fill threshold is $axis_fill_threshold, what would you like to set it to?\n";
    my $i=stdin();
    if($i ne '')
    {
	$axis_fill_threshold=$i;
    }
    return;
}

sub exit_program
{
    $exit_boolean=1;
}



sub image_info
{
    # output some information about the image
    for(my $i=0;$i<=$#labels;$i++)
    {
	print $labels[$i],': ',$image_info[$i],"\n";
    }
}

sub guess_axes_wrapper
{
    # guess axes

    if($bitmap[$width][$height] eq '')
    {
	make_bitmap();
    }
    my @xfills;
    for(my $y=0;$y<=$height;$y++)
    {
	my $fill_frac=row_fill_fraction($y);
	if($fill_frac>$axis_fill_threshold)
	{
	    #print "Row $y is full: possible x-axis\n";
	    push(@xfills,$fill_frac.' at '.$y);
	}
    }
    
    # guess x axis range from given values
    @xfills=reverse sort @xfills;
    @xaxes=guess_axes($width,@xfills);

    # output info
    print color('green'), "X axes at rows $xaxes[0] and $xaxes[1]\n",color('white');

    # repeat for y-axis
    my @yfills;
    for(my $x=0;$x<=$width;$x++)
    {
	my $fill_frac=col_fill_fraction($x);
	if($fill_frac>$axis_fill_threshold)
	{
	    #print "Col $x is $fill_frac full: possible y-axis\n";
	    push(@yfills,$fill_frac.' at '.$x);
	}
    }
    
    @yfills=reverse sort @yfills;
    @yaxes=guess_axes($height,@yfills);
    
    # output info
    print color('green'),"Y axes at columns $yaxes[0] and $yaxes[1]\n",color('white');

}


sub make_bitmap
{
    # get pixel locations and save anything that is black
    
    $bitmap[$width][$height]=0; # pre-create array to save time
    print "Making bitmap... (width $width height $height)\n";
    my $x;
    my $y;
    my $pixel;
    for($x=0;$x<=$width;$x++)
    {
	if(($x%$mod)==0){printf "Row $x/$width (%2.2f %%)\x0d",(100.0*$x/$width);}
	
	for($y=0;$y<=$height;$y++)
	{
	    $pixel=$postscript_image->Get("pixel[$x,$y]");
	    
	    if($pixel ne '65535,65535,65535,0')
	    {
		$bitmap[$x][$y]=1;
		#print "X $x Y $y -> $pixel\n";
	    }
	}
    }
    print "\n";
}

sub display_guessed_axes
{
    
    # display the image with the axes drawn in
    #my $guess_image=Image::Magick->new;
    my $guess_image=$postscript_image->Clone(); # first copy image
    
    # x2 axis
    $guess_image->Draw(stroke=>'blue',
		       primitive=>'line',
		       points=>"$yaxes[0],$xaxes[0] $yaxes[1],$xaxes[0]");
    
    # y axis
    $guess_image->Draw(stroke=>'red',
		       primitive=>'line',
		       points=>"$yaxes[0],$xaxes[0] $yaxes[0],$xaxes[1]");

    # x axis
    $guess_image->Draw(stroke=>'blue',
		       primitive=>'line',
		       points=>"$yaxes[0],$xaxes[1] $yaxes[1],$xaxes[1]");

    # y2 axis
    $guess_image->Draw(stroke=>'red',
		       primitive=>'line',
		       points=>"$yaxes[1],$xaxes[0] $yaxes[1],$xaxes[1]");
    #$guess_image->Display();
    display($guess_image);

}


sub row_fill_fraction
{
    # return the fraction of a row which is filled
    my $row=shift;
    my $fillcount=0;

    for(my $i=0;$i<=$width;$i++)
    {
	if($bitmap[$i][$row]==1)
	{
	    $fillcount++;
	}
    }

    return(1.0*$fillcount/$width);
}

sub anum
{
    # sorter
    $a=~/^(\d+)/o; my $a2=$1;
    $b=~/^(\d+)/o; my $b2=$1;
    return($a <=> $b);
}

sub col_fill_fraction
{
    # return the fraction of a column which is filled
    my $col=shift;
    my $fillcount=0;

    for(my $i=0;$i<=$height;$i++)
    {
	if($bitmap[$col][$i]==1)
	{
	    # up the count for fill
	    $fillcount++;
	}
    }

    return(1.0*$fillcount/$height);
}

sub guess_axes
{
    # given a list of possible axes, determine using some clever algorithm
    # which are really the axes
    my $maxval=shift; # maximum row/column value
    $maxval*=1.0; # convert to floating point
    my $axis1=-1;
    my $axis2=-1;
    my $i=0; # counter
    map
    {
	if($axis1==-1)
	{
	    # no first axis defined yet, could this be it?
	    /(\S+) at (\S+)/o;
	    if($2/$maxval<0.5) # must be in left/bottom half of graph
	    {
		$axis1=$2;
	    }
	}
	else
	{
	    # we have the first axis, seek the second
	    /(\S+) at (\S+)/o;
	    if($2/$maxval>=0.5) # must be in right/top half of graph
	    {
		$axis2=$2;
	    }
	}
    }@_;
    return($axis1,$axis2);
}


sub guess_data
{
    if($bitmap[$width][$height] eq '')
    {
	make_bitmap();
    }
    # loop through the data (over the axis ranges) and
    # decide which data to save and which to ignore
    @outdata=();
    @data=();

    print "Guessing data...\n"; #xaxes @xaxes: yaxes @yaxes\n";
    my $dticx=int($tic_threshold*$height);
    my $dticy=int($tic_threshold*$width);
    #print "dticx = $dticx, dticy = $dticy\n";
    
    for(my $x=$yaxes[0]+1+$dticx;$x<=$yaxes[1]-1-$dticx;$x++)
    {
	$ndatapoints=0;
	for(my $y=$xaxes[0]+1+$dticy;$y<=$xaxes[1]-1-$dticy;$y++)
	{
	    
	    if($bitmap[$x][$y]==1)
	    {
		if($bitmap[$x][$y-1]!=1)
		{
		    #print "Data at $x $y\n";
		    $data[$x][$y]=1; # data for replotting
		    $outdata[$ndatapoints++][$x]=$y;
		    
		    if($ndatapoints>=$number_of_datalines)
		    {
			# we have found them all
			#$y=$xaxes[1]; # force end of loop
			$y=$xaxes[1]+10;
		    }
		}
	    }
	}    
    }
    print "Done data guesswork\n";
}

sub draw_data
{
    # draw the best data guess pixel by pixel and then display
    my $copy_image=$postscript_image->Clone();
    my $dticx=int($tic_threshold*$height);
    my $dticy=int($tic_threshold*$width);
    my $min=$xaxes[0]+1+$dticy;
    my $max=$xaxes[1]-1-$dticy;

    for(my $x=$yaxes[0]+1+$dticx;$x<=$yaxes[1]-1-$dticx;$x++)
    {
	#my $d=0;
	for(my $y=$min;$y<=$max;$y++)
	{
	 #   if($d==0)
	#    {
	#	if(rand()>0.95)
	#	{
	#	    $copy_image->Set("pixel[$x,$y]"=>sprintf("\#0000%lx",int(255.0*($y-$min)/($max-$min))));
	#	}
	#    }
	    if($data[$x][$y]==1)
	    {
		#$d=1;
		$copy_image->Set("pixel[$x,$y]"=>'magenta');
	    }
	}    
    }
    # display the image
    display($copy_image);
    #$copy_image->Display();
}

sub display_image
{
    display($postscript_image);
}

sub display
{
    print "Waiting for display window to close...";
    $_[0]->Display();
    print "ok\n";
}

sub dump_data
{
    # dump previously guessed data as x,y points
 # draw the best data guess pixel by pixel and then display
    my $copy_image=$postscript_image->Clone();
    my $dticx=int($tic_threshold*$height);
    my $dticy=int($tic_threshold*$width);

    my $dx=$xrange[1]-$xrange[0];
    my $dy=$yrange[1]-$yrange[0];

    # open dump file
    open(DUMP,">".$dumpfile)||print "Could not open file $dumpfile for output\n";

    my $dypixels=$yaxes[1]-$yaxes[0];
    my $dxpixels=$xaxes[1]-$xaxes[0];

    for(my $x=$yaxes[0]+1+$dticx;$x<=$yaxes[1]-1-$dticx;$x++)
    {
	# check for data at this $x...
	if($outdata[0][$x] ne '')
	{
	    # we have data at this x, output for all datapoints
	    for(my $i=0;$i<$ndatapoints;$i++)
	    {
		my $y=$outdata[$i][$x];
		if($i==0)
		{
		    # first item: always x coord
		    print DUMP $xrange[0]+$dx*(($x-$yaxes[0])/$dypixels),' ';
		}
		# always dump data
		if($y ne '')
		{
		    print DUMP $yrange[0]+($yrange[1]-$dy*($y-$xaxes[0])/$dxpixels),' ';
		}
		if($i==$ndatapoints-1)
		{
		    # last item, output newline
		    print DUMP "\n";			
		}
	    }
	    
	}    
    }
    close DUMP;
    print "Data dumped to $dumpfile\n";
}

sub set_tic_threshold
{
    print "Please enter new tic threshold (as a fraction e.g. 0.05, currently it is $tic_threshold)\n";
    my $i=stdin();
    if($i ne '')
    {
	$tic_threshold=$i;
    }
}
sub set_dumpfile
{
    print "Please filename for data dump (currently $dumpfile)\n";
    my $i=stdin();
    if($i ne '')
    {
	$dumpfile=$i;
    }
}
sub set_num_datalines
{
    print "Please enter the number of curves of data there are on the graph\n";
    my $i=stdin();
    if($i ne '')
    {
	$number_of_datalines=$i;
    }
}

sub help
{
    print "a2data: Anything to Data\n";
    print "Usage: a2data <image>\n";
    print "a2data will try to guess the location of image data for you in a semi-intelligent way.\n";
    exit;
}

sub change_import_resolution
{
   print "Please enter the resolution for image import in DPI\n";
   my $i=stdin();
   if($i ne '')
   {
       $resolution=$i;
   }
}
