#!/usr/bin/env python

"""
Code for running the competition described at
http://compprag.christopherpotts.net/pdtb-competition.html
"""

__author__ = "Christopher Potts"
__copyright__ = "Copyright 2011, Christopher Potts"
__credits__ = []
__license__ = "Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License: http://creativecommons.org/licenses/by-nc-sa/3.0/"
__version__ = "1.0"
__maintainer__ = "Christopher Potts"
__email__ = "See the author's website"

######################################################################

import pickle
import numpy
from collections import defaultdict
import pdtb
import pdtb_classifier
from pdtb_experiment_predict_implicit import word_pair_features, inquirer_features
from pdtb_competition_team_banana_slugs import banana_slugs_feature_function
from pdtb_competition_team_banana_wugs import banana_wugs_feature_function
from pdtb_competition_team_compprag import compprag_feature_function

######################################################################

def pdtb_predict_implicit_competition_run(feature_function, train_set_indices, test_set_indices, print_features='all'):
    """
    Generic experiment code for training, testing, and assessing a classifier.

    Arguments:
    
    feature_function -- should be a function taking datum objects as the sole argument
    and returning a dictionary mapping feature names to values.
    train_set_indices -- the set of training data indices
    test_set_indices -- the set of test data indices
    print_features -- number of features to print; if int, then prints that many features; 'all' prints all of them (default: 'all')
    """
    # Instantiate the corpus:
    corpus = pdtb.CorpusReader('pdtb2.csv')
    # Create the classifier instance:
    model = pdtb_classifier.PdtbClassifier(corpus, feature_function, train_set_indices=train_set_indices, test_set_indices=test_set_indices)
    # Sequence of commands for running the experiment:
    model.train()
    model.test()
    # Output confusion matrix:
    print "======================================================================"
    print "Confusion matrix"
    model.print_cm()
    # Assessment values:
    print "======================================================================"
    print "Effectiveness"
    model.print_effectiveness() # Precision, recall, and F1, with averages.
    print "======================================================================"
    model.print_accuracy()    
    model.print_train_set_accuracy()
    # Features:
    print "======================================================================"
    print "Feature information"
    model.print_feature_count()
    print "\n"
    if print_features:
        if print_features == 'all':
            print_features = model.feature_count()
        model.classifier.show_most_informative_features(n=print_features, show='all')

######################################################################

implicit_train_picklename = 'pdtb-competition-implicit-train-indices.pickle'
implicit_test_picklename = 'pdtb-competition-implicit-test-indices.pickle'
train_set_indices = pickle.load(file(implicit_train_picklename))
test_set_indices = pickle.load(file(implicit_test_picklename))

def potts_baselines():
    #-------------------------------------------------
    # Verbal word pairs:
    def verb_pairs_feature_function(datum):
        return word_pair_features(datum, tags=['md', 'v'])
    pdtb_predict_implicit_competition_run(verb_pairs_feature_function, train_set_indices, test_set_indices)
    #-------------------------------------------------
    # Inquirer:
    inq = pickle.load(file('harvard_inquirer.pickle'))
    def inquirer_feature_function(datum):
        return inquirer_features(datum, inq)
    pdtb_predict_implicit_competition_run(inquirer_feature_function, train_set_indices, test_set_indices)
    #-------------------------------------------------
    # Combo:
    def combo_feature_function(datum):
        return dict(word_pair_features(datum).items() + inquirer_feature_function(datum).items())
    pdtb_predict_implicit_competition_run(combo_feature_function, train_set_indices, test_set_indices)

def implicit_competition():
    pdtb_predict_implicit_competition_run(banana_wugs_feature_function, train_set_indices, test_set_indices)
    pdtb_predict_implicit_competition_run(banana_slugs_feature_function, train_set_indices, test_set_indices)
    pdtb_predict_implicit_competition_run(compprag_feature_function, train_set_indices, test_set_indices)
    
    
# potts_baselines()
# implicit_competition()
