#!/usr/bin/env python

"""
Command-line program for organizing and viewing subsets of the
features in the results files produced by pdtb_competition.py.
"""

__author__ = "Christopher Potts"
__copyright__ = "Copyright 2011, Christopher Potts"
__credits__ = []
__license__ = "Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License: http://creativecommons.org/licenses/by-nc-sa/3.0/"
__version__ = "1.0"
__maintainer__ = "Christopher Potts"
__email__ = "See the author's website"

######################################################################

import os
import re
import sys
import getopt
from collections import defaultdict

######################################################################

def format_top_features(filename, n=5, keep_neg=False):
    print "======================================================================"
    print filename
    lines = open(filename).read().splitlines()
    cats = ["'Comparison'", "'Contingency'", "'Expansion'", "'Temporal'"]
    cats_re = re.compile(r"^(.+?) and label is (%s)" % '|'.join(cats))        
    finds = defaultdict(list)
    for line in lines:
        match = cats_re.search(line)
        if match:
            feat, cat = match.group(1,2)
            if not keep_neg:
               if not re.search(r"^\s*-", feat):                                      
                   finds[cat].append(feat)
            else:
                finds[cat].append(feat)
    for cat in cats:
        print cat.strip("'")
        for feat in finds[cat][ :n]:
            print feat

######################################################################
# COMMAND-LINE INTERFACE

def usage():
    print "Usage: [-hfnk] filename"
    print "  -h --help      Prints this help message."
    print "  -f --filename  Filename to process."
    print "  -n --number    Number of features from each class to view."
    print "  -k --keep_neg  Keep negative weights."
        
if __name__ == '__main__':
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hf:n:k", ["help", "filename=", "number=", "keep_neg"])
    except getopt.GetoptError:
        usage()
    filename = None
    n = 5
    keep_neg = False
    for opt, opt_val in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit(2)
        elif opt in ("-f", "--filename"):
            filename = opt_val
            if not os.path.exists(filename):
                raise ArgumentError("Couldn't find %s" % filename)
        elif opt in ("-n", "--number"):
            try:
                n = int(opt_val)
            except:
                print "\nWarning: Value for -n (--number) must be an integer. Proceeding with 5."
        elif opt in ('-k', '--keep_neg'):
            keep_neg = True
    if filename:        
        format_top_features(filename, n=n, keep_neg=keep_neg)
    else:
        usage()
            

