package Net::Amazon::MechanicalTurk::Command::LoadHITs;
use strict;
use warnings;
use Carp;
use Data::Dumper;
use IO::File;
use Net::Amazon::MechanicalTurk::BulkSupport;
use Net::Amazon::MechanicalTurk::DataStructure;
use Net::Amazon::MechanicalTurk::RowData;
use Net::Amazon::MechanicalTurk::Properties;
use Net::Amazon::MechanicalTurk::Template;
use Net::Amazon::MechanicalTurk::Template::ReplacementTemplate;
use Net::Amazon::MechanicalTurk::DelimitedWriter;

our $VERSION = '1.01_01';

=head1 NAME

Net::Amazon::MechanicalTurk::Command::LoadHITs - Bulk Loading support for Amazon Mechancial Turk.

This module adds the loadHITs method to the Net::Amazon::MechanicalTurk class.

=head1 SYNOPSIS

    # See the sample loadHITs from the source distribution.

    sub questionTemplate {
        my %params = %{$_[0]};
        return <<END_XML;
    <?xml version="1.0" encoding="UTF-8"?>
    <QuestionForm xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionForm.xsd">
      <Question>
        <QuestionIdentifier>1</QuestionIdentifier>
        <QuestionContent>
          <Text>$params{question}</Text>
        </QuestionContent>
        <AnswerSpecification>
          <FreeTextAnswer/>
        </AnswerSpecification>
      </Question>
    </QuestionForm>
    END_XML
    }
    
    my $properties = {
        Title       => 'LoadHITs Perl sample',
        Description => 'This is a test of the bulk loading API.',
        Keywords    => 'LoadHITs, bulkload, perl',
        Reward => {
            CurrencyCode => 'USD',
            Amount       => 0.01
        },
        RequesterAnnotation         => 'test',
        AssignmentDurationInSeconds => 60 * 60,
        AutoApprovalDelayInSeconds  => 60 * 60 * 10,
        MaxAssignments              => 3,
        LifetimeInSeconds           => 60 * 60
    };
    
    my $mturk = Net::Amazon::MechanicalTurk->new;
    
    $mturk->loadHITs(
        properties => $properties,
        input      => "loadhits-input.csv",
        question   => \&questionTemplate,
        progress   => \*STDOUT,
        success    => "loadhits-success.csv",
        fail       => "loadhits-failure.csv"
    );

=head1 C<loadHITs>

loadHITs

Bulk loads many hits of the same hit type into mechanical turk.
The method takes a set of properties used to create a HITType and its
associated HITs.  To generate questions for HITs, rows of data are
pulled from an input source which is merged against a question template
to generate the question xml.  For each row in an input source, 1 HIT
is generated. Note: The source distribution of the Mechanical Turk Perl SDK
contains samples using this method.

loadHITs takes a hash reference or a hash with the following parameters:

  
    properties - (required) Either a hash reference or the name of a file,
                 containing the properties to use for generating a HITType
                 and the associated HITs.  When the properties are read from
                 a file, the method
                 Net::Amazon::MechanicalTurk::Properties->readNestedData is
                 used.
                 
    input      - (required) The input source for row data.
                 This parameter may be of the following types:
                    - Net::Amazon::MechanicalTurk::RowData
                    - An array of hashes.
                      (This is internally converted into an object of type:
                       Net::Amazon::MechanicalTurk::RowData::ArrayHashRowData)
                    - A reference to a subroutine.  When the loadHITs method
                      asks for row data, the subroutine will be called and
                      passed a subroutine reference, which should be called
                      for every row generated by the input.  The generated row
                      should be a hash reference.
                      (This is internally converted into an object of type
                       Net::Amazon::MechanicalTurk::RowData::SubroutineRowData)
                    - The name of a file.  The file should be either a CSV or
                      tab delimited file.  If the file name ends with '.csv',
                      it will read as a CSV, otherwise it is assumed to be
                      tab delimited. The first row in the file should contain
                      the column names.  Each subsequent row becomes a hash
                      reference based on the column names.
                      (This is internally converted into an object of type
                       Net::Amazon::MechanicalTurk::RowData::DelimitedRowData)
              
    question   - (required) The question template used to generate questions.
                 This parameter may be of the following types:
                    - An object of type Net::Amazon::MechanicalTurk::Template.
                    - A subroutine.  The subroutine will be given a hash
                      reference representing the current input row.
                      (This is internally converted into an object of type
                       Net::Amazon::MechanicalTurk::Template::SubroutineTemplate)
                    - A filename ending in .rt or .question. This is a text
                      file which contains variables, which will be substituted
                      from the input row.  Variables in the text file have
                      the syntax ${var_name}.
                    - A filename ending in .pl.  This is a perl script, which
                      has 2 variables set named %params and $out.  %params are
                      the parameters representing the input row and $out is
                      the IO::Handle the question should be written to. Before
                      this script is invoked, the $out handle is selected as
                      the default handle, so calls to print and printf without
                      a handle, will go to $out.
                      Note: Use of this type of question, requires the
                      IO::String module.
             
    preview    - (optional) If preview is specified, a HITType and no HITs
                 will be created, instead, the preview parameter will be
                 given the parameters that would be used create the HIT.
                 This parameter may be of the following types:
                    - A subroutine. The subroutine is called with the
                      CreateHIT parameters.
                    - An IO::Handle. Each question from the CreateHIT
                      parameters will be printed to the handle.
                    - The name of a file. Each question from the CreateHIT
                      parameters will be printed to the file.
                    
    progress   - (optional) Used to display progress messages.  This
                 parameter may be of the following types:
                    - A subroutine. The subroutine is called with 1 parameter,
                      a message to be displayed.
                    - An IO::Handle. The progress message is written to the
                      handle.
    
    success    - (optional) Used to handle a successfully created hit.  This
                 parameter may be of the following types:
                    - A filename. HITId's and HITTypeId's will be written to
                      this file.  The file will be in a delimited format,
                      with the first row containing column headers. If the
                      filename ends in ".csv" the file format will be CSV,
                      otherwise it will be tab delimited.
                    - A subroutine. The subroutine is called when a hit is
                      created and passed a hash with the following parameters:
                          - mturk      - A handle to the mturk client.
                          - fields     - An array reference of the field names
                                         for the input row.
                          - row        - The input row the hit was created
                                         from.
                          - parameters - The parameters given to CreateHIT.
                          - HITId      - The HITId created.
                          - HITTypeId  - The HITTypeId of the hit created.
                    
    fail       - (optional) Used to handle a hit which failed creation.  If
                 this value is not specified and a hit fails creation, an
                 error will be raised. This value may be of the following
                 types:
                    - A filename. The input row will be written back to the
                      file in a delimited format. If the file name ends with
                      ".csv", then the file will be in CSV format, otherwise
                      it will be in a tab delimited format.
                    - A subroutine.  The subroutine will be called back with
                      a hash containing the following values:
                          - mturk      - A handle to the mturk client.
                          - fields     - An array reference of the field names
                                         for the input row.
                          - row        - The input row the hit was created
                                         from.
                          - parameters - The parameters given to CreateHIT.
                          - HITTypeId  - The HITTypeId that was used in the
                                         CreateHIT call.
                          - error      - The error message associated with
                                         the failure.
                   
    maxHits    - (optional) If this value is greater than 0, than at most
                 maxHits will be created.
    
    entityEscapeInput - (optional) If this value is a true value then the
                        input row will have certain values encoded as xml
                        entities, before being passed to the template.
                        The unescaped values will be accessible as <key>_raw.
                        The characters escaped are >, <, &, ' and ".
                        This parameter is on by default.

=cut


sub loadHITs {
    my $mturk = shift;
    my %params = @_;
    
    foreach my $required (qw{ properties input question }) {
        if (!exists $params{$required}) {
            Carp::croak("Missing required parameter $required.");            
        }
    }
    
    my $preview  = previewBlock($params{preview});
    my $progress = Net::Amazon::MechanicalTurk::BulkSupport::progressBlock($params{progress});
    my $success  = Net::Amazon::MechanicalTurk::BulkSupport::successBlock($params{success});
    my $fail     = Net::Amazon::MechanicalTurk::BulkSupport::failBlock($params{fail});
    my $maxHits  = (exists $params{maxHits}) ? $params{maxHits} : -1;
    
    $params{entityEscapeInput} = (exists $params{entityEscapeInput}) ? $params{entityEscapeInput} : 1;
    
    if ($progress) {
        $progress->("--[Initializing] " . scalar localtime() . " ---");
        $progress->("  URL:        " . $mturk->serviceUrl);
        $progress->("  Properties: $params{properties}");
        $progress->("  Input:      $params{input}");
        $progress->("  Question:   $params{question}");
    }
    
    my $properties = Net::Amazon::MechanicalTurk::Properties->toProperties($params{properties});
    my $input      = Net::Amazon::MechanicalTurk::RowData->toRowData($params{input});
    my $question   = Net::Amazon::MechanicalTurk::Template->toTemplate($params{question});
    
    my $createHITTypeProperties = Net::Amazon::MechanicalTurk::BulkSupport::getCreateHITTypeProperties($properties);
    my $createHITProperties     = Net::Amazon::MechanicalTurk::BulkSupport::getCreateHITProperties($properties);

    my $hitTypeId  = -1;
    my $exitedEach = 0;
    my $rowNumber  = 0;
    my $hitsLoaded = 0;
    my $failures   = 0;
    my $start      = time();
    my $lastHITId;
    
    eval {
    
        # Create HITType    
        if (!$preview) {
            $progress->("--[Creating HITType] " . scalar localtime() . " ---") if ($progress);
            $hitTypeId = Net::Amazon::MechanicalTurk::BulkSupport::createHITType($mturk, $createHITTypeProperties, $properties, $progress);
            $progress->("  Created HITType (HITTypeId: $hitTypeId)") if ($progress);
        }
        
        $progress->("--[Loading HITs] " . scalar localtime() . " ---") if ($progress);
        $input->each(sub {
            my ($_self, $row) = @_;
            $rowNumber++;
     
            if ($maxHits >= 0 and $rowNumber > $maxHits) {
                $exitedEach = 1;
                die "Exiting each loop";
            }
            
            # Create hit params
            my $hitProps = { HITTypeId => $hitTypeId };
            while (my ($htProp, $htTempl) = each %$createHITProperties) {
                $hitProps->{$htProp} = $htTempl->execute($row);
            }
            
            # Get a merged copy of row and properties
            my $templateParams;
            if ($params{entityEscapeInput}) {
                # Entity encodes values in the hash
                # and makes copies of the original values
                # with the key name <key>_raw.
                $templateParams = xmlEntityEscapeHashValues($row);
            }
            else {
                $templateParams = {%{$row}}; # makes a copy
            }
                
            while (my ($pKey, $pVal) = each %$properties) {
                if (!exists $templateParams->{$pKey}) {
                    $templateParams->{$pKey} = $pVal;
                }
            }
            
            $hitProps->{Question} = $question->execute($templateParams);
            
            if ($preview) {
                $preview->($hitProps);
            }
            else { # CreateHIT
                eval {
                    $lastHITId = $mturk->CreateHIT($hitProps)->{HITId}[0];
                    $progress->("  Created HIT $rowNumber (HITId: $lastHITId).") if ($progress);
                    $hitsLoaded++;
                    $success->(
                        mturk      => $mturk,
                        fields     => $input->fieldNames,
                        row        => $row,
                        parameters => $hitProps,
                        HITId      => $lastHITId,
                        HITTypeId  => $hitTypeId
                    );
                };
                if ($@) {
                    $failures++;
                    $progress->("  $@") if $progress;
                    $fail->(
                        mturk      => $mturk,
                        fields     => $input->fieldNames,
                        row        => $row,
                        parameters => $hitProps,
                        HITTypeId  => $hitTypeId,
                        error      => $@
                    );
                }
            } # End CreateHIT
            
        }); # End each
    };
    if ($@ and !$exitedEach) {
        my $message = "\nAn error occurred while loading a HIT.\n";
        $message .= "\n$@\n";
        if ($mturk->response) {
            if ($mturk->response->errorCode) {
                $message .= "\nError Code: " . $mturk->response->errorCode . "\n";
                $message .= "Error Message: " . $mturk->response->errorMessage . "\n";
            }
        }
        if ($rowNumber > 0) {
            $message .= "\nFailed on row $rowNumber in input $params{input}.\n";
        }
        if ($mturk->request) {
            $message .= "\nLast operation called " . $mturk->request->{Operation} . ".\n";
            $message .= "\nDump of call parameters:\n" .
                Dumper($mturk->request, 4) . "\n";
        }
        if ($mturk->response) {
            $message .= "\nDump of response:\n" .
                Dumper($mturk->response->fullResult, 4) . "\n";
        }
        Carp::croak($message);
    }
    
    if ($progress) {
        $progress->("  Failed to load $failures hits.");
        $progress->("  Loaded $hitsLoaded hits.");
        $progress->("--[Done Loading HITs] " . scalar localtime() . " ---");
        $progress->("  Total load time: " . (time() - $start) . " seconds.");
        $progress->("  You may see your HITs here: " . $mturk->getHITTypeURL($hitTypeId));
    }
    
    return { loaded => $hitsLoaded, failed => $failures, HITTypeId => $hitTypeId };
}

sub previewBlock {
    my ($preview) = @_;
    if (!defined($preview)) {
        return $preview;
    }
    elsif (UNIVERSAL::isa($preview, "CODE")) {
        return $preview;
    }
    elsif (UNIVERSAL::isa($preview, "GLOB")) {
        return sub {
            my $hitProps = shift;
            print $preview $hitProps->{Question}, "\n";
        };
    }
    else {
        my $out;
        return sub {
            my $hitProps = shift;
            if (!$out) {
                $out = IO::File->new($preview, "w");
                if (!$out) {
                    die "Couldn't open file $preview - $!.";
                }
            }
            print $out $hitProps->{Question}, "\n";
        };
    } 
}

sub xmlEntityEscapeHashValues {
    my $hash = shift;
    my $newHash = {};
    while (my ($key,$value) = each %$hash) {
        if (!exists $newHash->{"${key}_raw"}) {
            $newHash->{"${key}_raw"} = $hash->{$key};
        }
        $newHash->{$key} = xmlEntityEscape($hash->{$key});
    }
    return $newHash;
}

sub xmlEntityEscape {
    my $text = shift;
    return $text unless defined($text);
    $text =~ s/[&'"<>]/xmlCharacterEscape($&)/egs;
    return $text;
}

sub xmlCharacterEscape {
    if ($_[0] eq "<")     { return "&lt;"; }
    elsif ($_[0] eq ">")  { return "&gt;"; }
    elsif ($_[0] eq "&")  { return "&amp;"; }
    elsif ($_[0] eq "\"") { return "&quot;"; }
    elsif ($_[0] eq "'")  { return "&apos;"; }
    return $_[0];
}

return 1;
