#!/usr/bin/perl
use strict;
use warnings;

my $file = $ARGV[0] or die "Usage: $0 <filename>\n";
my $error_count = 0;
my $max_errors = 9;
my %seen_models;

my @required_order = (
    qr/^PFRMAT QA/,
    qr/^TARGET \S+/,
    qr/^AUTHOR \S+/,
    qr/^MODEL/
);

my $allowed_optional = qr/^(METHOD|REMARK|PARENT|STOICH)/i;

open(my $fh, '<', $file) or die "# ERROR! Could not open '$file': $!";

my $req_idx = 0;
while (my $line = <$fh>) {
    chomp($line);
    #print "$line\n";
    $line =~ s/\s+$//;
    next if $line =~ /^\s*$/; # Skip blank lines

    if ($req_idx < scalar @required_order) {
        
        if ($line =~ $required_order[$req_idx]) {
            $req_idx++;
            next;
        } 
        
        if ($line =~ $allowed_optional) {
            next; 
        }

        if ($line =~ /^[TRHM]/ || $line =~ /pdb/i) {
             print "# ERROR! Missing required header matching $required_order[$req_idx] before data started.\n";
             exit 1;
        }

        print "# ERROR! Line $.: Unknown or misplaced keyword in header block: '$line'\n";
        exit 1;
    }

    validate_data_line($line, $.);
    last; 
}

if ($req_idx < scalar @required_order) {
    print "# ERROR! File ended before all required headers (PFRMAT, TARGET, AUTHOR, MODEL) were found.\n";
    exit 1;
}

while (my $line = <$fh>) {
    chomp($line);
    $line =~ s/\s+$//;
    last if $line =~ /^END/;
    next if $line =~ /^\s*$/;
    validate_data_line($line, $.);
}

close($fh);

if ($error_count > 0) {
    print "\nValidation finished with $error_count error(s).\n";
    exit 1;
} else {
    print "\nSuccess: File is well-formatted.\n";
    exit 0;
}

sub validate_data_line {
    my ($line, $line_num) = @_;

    if ($error_count >= $max_errors) {
        print "# ERROR! Too many errors. Stopping validation.\n";
        exit 1;
    }

    my @columns = split(/\s+/, $line);
    if (scalar @columns < 3) {
        print "# ERROR! Line $line_num - Missing columns. Each line needs: name, score, interface scores - see Example 5\n";
        $error_count++;
        return;
    }

    my $name = $columns[0];
    my $overall_score = $columns[1];
    my $interface_blob = join('', @columns[2..$#columns]);

    if ($seen_models{$name}) {
        print "# ERROR! Line $line_num - Duplicate model name: '$name' was already on line $seen_models{$name}.\n";
        $error_count++;
    }
    $seen_models{$name} = $line_num;

    unless ($name =~ /^[TRHM]/ || $name =~ /pdb/i) {
        print "# ERROR! Line $line_num - Invalid model name '$name'.\n";
        $error_count++;
    }

    if ($overall_score !~ /^\d*\.?\d+$/ || $overall_score < 0 || $overall_score > 1) {
        print "# ERROR! Line $line_num - Invalid overall score '$overall_score' (Range: 0-1).\n";
        $error_count++;
    }

    my @pairs = split(/,/, $interface_blob);
    foreach my $pair (@pairs) {
        if ($pair =~ /^([A-Za-z0-9]{2}):(\d*\.?\d+)$/) {
            if ($2 < 0 || $2 > 1) {
                print "# ERROR! Line $line_num - Interface score $2 in '$pair' is out of range.\n";
                $error_count++;
            }
        } else {
            print "# ERROR! Line $line_num - Malformed interface entry '$pair'.\n";
            $error_count++;
        }
    }
}
