#!/usr/bin/perl

BEGIN {unshift@INC,('../../');}
BEGIN {unshift@INC,('../');}

use strict;
require W3C::Rdf::RdfApp;
require W3C::Util::Exception;

require RDF::RDFWeb::Node;
require RDF::RDFWeb::MemDB;

package RDF::RDFWeb::XRDFDataSource; 		
my $verbose=1;
my @data; # class variable; should be using data field in the object hash
my $rs; ##another class var

@RDF::RDFWeb::XRDFDataSource::ISA = qw(W3C::Rdf::RdfApp);
my $debug=1;
my %parsers;
$parsers{'http://cara.sourceforge.net/'}='The Cara parser';
$parsers{'http://www.w3.org/Perllib/'}='EricP perllib parser'; # works ok
$parsers{'http://www.redland.opensource.ac.uk/'}='Redland/Rapier'; # not yet
$parsers{'http://ilrt.org/discovery/2001/04/tripler/'}="simple triple file parser.";
			# how to deal with parametised parser setups ?

1;


sub new {
  my $this = shift;
  my $parseruri = shift;
  my $outds = shift; # optional: where to store triples
   my $class = ref($this) || $this;
  my $self = $class->SUPER::new();
  bless $self, $class;
  $self->{'PARSERURI'}=$parseruri;
  $self->{'OUTDS'}=\$outds; # used by getStatements
  unless ($parsers{$parseruri} ) {
    die ("can't find parser $parseruri\n");
  }
  print "XRDFDataSource for parser $parseruri ".$parsers{$parseruri}."\n" if $debug;
  return $self;
}

sub data {
  my $self=shift;
  return ($self->{'DATA'});
}

sub getMemDB {
  my $self=shift;
  my $out = ${ $self->{'OUTDS'} } ;
  if ($out) {
    print STERR "debug GetMemDB: found an outds: ", $out ,"\n";
  }
  my $mem = $out || new RDF::RDFWeb::MemDB; # should persist this, build only only
  # todo: we don't need 'DATA' any more... should just use the 'outds'
  foreach my $t ( @{ $self->{'DATA'}  } ) {
    my %s = %{$t};
    my ($p,$s,$o) = ( $s{'p'}, $s{'s'}, $s{'o'} );
    assert $mem $p->value, $s->value, $o->value;
  }
  return ( \$mem );
}



#########################################################################
#
# get data using parser of choice... (todo: default=w3c)
#
#########################################################################
sub get {
  my ($self) = shift;
   my $uri = shift;
   my $baseuri = shift;
   if ( (!$self->{'baseuri'}) && (!$baseuri) ) { 
     $baseuri =  $uri;
     $self->{'baseuri'}=$baseuri; 
   }
   my @todo;
   # print "Passing on todo: $uri\n" if $debug;
   @data=(); # reset the (GLOBAL! todo! needed due to cara) datadump
   $rs = randstring(); # get a random pseudo URI for use in genids

   my $parser=$self->{'PARSERURI'};
   print "PARSER: $parser \n";
 

# http://cara.sourceforge.net/
# http://www.w3.org/Perllib/

   if ($parser eq 'http://www.w3.org/Perllib/') {

     push (@todo, $uri);
     $self->execute(\@todo);

   } elsif ($parser eq 'http://cara.sourceforge.net/') {

   $self->_cget($uri);

   } elsif ($parser eq 'http://ilrt.org/discovery/2001/04/tripler/') {

     open(IN,"$uri") || warn "Can't load triple file $uri\n";
     while(<IN>) {
       my ($a,$b,$c)=split(/','/,$_,3);
       $a =~ s/rdf\('//; $c =~ s/'\)\.//;
       chomp $a; chomp $b; chomp $c;
       $c =~ s/\r//g;
       my $sub = new RDF::RDFWeb::Node($b);
       my $pred = new RDF::RDFWeb::Node($a);
       my $obj = new RDF::RDFWeb::Node($c);#how do we do genid etc? --todo
       my %s;
       $s{'s'}=$sub;  
       $s{'p'}=$pred;  
       $s{'o'}=$obj;  
#       print "tripler storing: $sub -- $pred --> $obj \n";
       push(@data,\%s);
       $self->{'DATA'} = \@data;
     }
   }
# danger! this is poorly organised...

my $out = $self->getMemDB();
return $out;
}


##########################################################################
## render() gets called when the RDF datasource finishes 
## doing its thing.
##
sub render {
  my ($self) = @_;
  my $attrib = $self->{RDF_PARSER}->getRootAttribution;
  my $sysID = $self->{RDF_PARSER}->getSystemId;
  my @t; 
  if ($attrib) {			   ### if this works, I get triples
    my $view;
    my $flags = {};
    my @statements = $self->{RDF_DB}->triplesMatching($view, [[undef,undef, undef]], $flags);
    if (!@statements) {
#      print "\n\nno triples\n";
#      print "not ok 1\n";
    }
    else {
      print STDERR "triplecount [perllib: ",$W3C::Rdf::Parser::REVISION, " ]  ",
	scalar @statements,"\n" if $verbose;
      foreach my $statement (@statements) {
	my $triple;
        my %s;
        my $p = scalar $statement->getPredicate->show;
        my $s = scalar $statement->getSubject->show;
        my $o = scalar $statement->getObject->show; ## uri/string? thing- @@todo
	my $obj=scalar $statement->getObject;
        print "DEBUG-Perllib: p=$p s=$s o=$o obj=$obj\n" if $verbose;

        $s{'s'} =  RDF::RDFWeb::Node->newResource ( $s ) if $s;
        $s{'p'} =  RDF::RDFWeb::Node->newProperty ( $p ) if $p;
    
        if ($obj->isa('W3C::Rdf::Uri')) {
	  $o = $obj->show ;#. "[uri]";
        $s{'o'} =  RDF::RDFWeb::Node->newResource( $o ) if $o;
        
	} 
        elsif ($obj->isa('W3C::Rdf::String')) {
        $o= $obj->show ;#."[literal]";
        $s{'o'} =  RDF::RDFWeb::Node->newLiteral ( $o ) if $o;
        }
        elsif ($obj->isa('W3C::Rdf::GenId')) {
            # print " genid!!! \n";
            $o= $obj->show; # . "[var]";
        $s{'o'} =  RDF::RDFWeb::Node->newResource( $o ) if $o;
        $s{'o'} -> {'ISGENID'}=1;
#        print "xrdfds: Set genid to 1 on $s{'o'}\n";
        } else {
	#die??? ('unknown node type from parser');
        }

	$o =~ s/\n//g;
        $triple .= "triple({". $p. "},{". $s. "},";
	if ($obj->isa('W3C::Rdf::String')) {
	  $triple .= $o.").\n";
        } 
        else {
        $triple .= "{". $o . "}).\n";
        }
        push (@t,\%s) ;
     }
    }
  } 
# print "Storing triples...\n";
$self->{'DATA'} = \@t;
}







sub _cget {
  my $self = shift;
  eval {
    require IWI::RDF::Graph::JGraph::StorableJGraph;
    require IWI::RDF::Writer::Graph::Triple;
    require IWI::RDF::Parser;
    require IO::String;
    my $file= shift;
    my $graph=new IWI::RDF::Graph::JGraph::StorableJGraph();
    eval {
      my $parser=new IWI::RDF::Parser(graph => $graph, showBags => 0,
       	filename => $file);
      my ($graph,%schema)=$parser->parse;
    };
    if($@){
       die "Cara Parser error: $@ -\n";
    } else {
  }

  my $string;
  my $out=new IO::String($string);
  my $writer=new IWI::RDF::Writer::Graph::Triple(graph=>$graph,out=>$out);
  $writer->write();

  $graph->foreachArc( \&_storefromcara );

  } ;
  if($@) {
    print STDERR "Error loading/running Cara parser code\n",join (/\n/,$@),"\n" if $verbose;
  }
 $self->{'DATA'} = \@data;
 
}


sub _storefromcara($$) {
  #  my
  my $arc = shift;
  my ($s,$p,$o);
  $s=$arc->getSubject->getLabel ne "" ?
  $arc->getSubject->getLabel:"genid:".$arc->getSubject->{ID};
  my $object=$arc->getObject;
  if($object->isa('IWI::RDF::Node::Resource')){
    $o=$object->getLabel;
    if($o eq ""){
      $o="genid:".$object->{ID};
     }
  } else {
    $o=$object->getLabel;
    $o = "\"".$o."\""; # wrap strings in "" (Todo: decide a better way)
  }
  $p=$arc->getPredicate();
  $o =~ s/^'/"/; 
  $o =~ s/'$/"/;
  $o =~ s/^genid:/"genid:".$rs/e;
  $s =~ s/^genid:/"genid:".$rs/e; 
  $o =~ s/^online:/"genid:online:".$rs/e;# cara doesn't understand ""
  $s =~ s/^online:/"genid:online:".$rs/e; 
      # no genids for predicates(we hope)
 
#  print "CARAPARSE: $s -- $p --> $o \n" ;
  my $sub = new RDF::RDFWeb::Node($s);
  my $pred = new RDF::RDFWeb::Node($p);
  my $obj = new RDF::RDFWeb::Node($o);
  my %s;
  $s{'s'}=$sub;  
  $s{'p'}=$pred;  
  $s{'o'}=$obj;  
#  print "Stored ".\%s." : $s -- $p --> $o \n" if $verbose;
  push(@data,\%s);
#  $self->{'DATA'} = \@data;
}


## a random string (to uniq'ify genids, and online: from Cara)
## todo: move to Util class; deal with 'online'
##

sub randstring {
my $seed=shift;
my $rnd;
my @x;
my $chunk;
for (my $i=0;$i<3;$i++) {
  my $chunk;
  for (my $j=0;$j<4;$j++) {
    $chunk .= int(rand(10));
  };
  push(@x,$chunk);
};
$rnd = join('-',@x);
#print "RANDSTRING: SEED: $seed RND: $rnd\n";
return $rnd."/";
}
