<?
# CSV-SPARQLer
# (c) 2005-2014 Morten Frederiksen
# License: http://www.gnu.org/licenses/gpl

define('REDLANDURI','mysql://mysql/redland');
include_once(
'service.php');
include_once(
'curl.php');

# Initialization.
$service=new Service(array(
        
'defaults'=>array(''=>array(
            
'header'=>'',
            
'distinct'=>'true',
            
'nulls'=>'false',
            
'output-type'=>'text/html; charset=utf-8'))));

# Convert (only csv parameter is required)?
if ($service->match('',array('csv'))
        && (!
array_key_exists('execute',$_REQUEST) || 'no'!=$_REQUEST['execute'])) {
    
$csv=$service->input('','csv');
    
$header=$service->input('','header');
    
$distinct=('true'==$service->input('','distinct'));
    
$nulls=('true'==$service->input('','nulls'));
    
$prepend=$service->input('','prepend');
    
$outputxslt=$service->input('','output-xslt');
    
$outputtype=$service->input('','output-type');
    
$xsltparms=array(
            
'_now'=>date('Y-m-d\TH:i:s').(date('Z')>0?'+':'').sprintf('%02d',date('Z')/3600).':00',
            
'_id'=>md5($csv),
            
'_uri'=>isset($_SERVER['argv'])?'N/A':'http://'.$_SERVER['SERVER_NAME'].
                    (
$_SERVER['SERVER_PORT']!=80?$_SERVER['SERVER_PORT']:'').
                    
$_SERVER['REQUEST_URI']);
    while (list(
$name,$value)=each($_REQUEST))
        
$xsltparms[$name]=$value;
    global 
$xslt_error_message;
    
$xslt_error_message='';
    
# Check remote XSLT...
    
if (!preg_match('|^http://|',$outputxslt))
        
$outputxslt='';
    
# GET CSV data and convert...
    
if (!preg_match('|^https?://|'$csv))
        
$service->error('400 Illegal CSV URI [CSV URI must be <code>http:</code> or <code>https:</code>.]');
    elseif (!(
$csvdata=curl_get($csv$service->uri)))
        
$service->error('502 Bad Gateway [Unable to retrieve CSV data from remote server.]');
    elseif (!(
$xml=csv2xml($csvdata$header$prepend$distinct$nulls)))
        
$service->error('500 Internal Server Error [Unable to convert CSV data from remote server.]');
    elseif (
$outputxslt
            
&& (!$result=xslt(array('/_xsl'=>@join('',@file($outputxslt)),'/_xml'=>$xml), $xsltparms))
            || 
$xslt_error_message)
        
$service->error('500 Internal Server Error [Unable to transform generated XML with XSLT ('.$xslt_error_message.').]');
    elseif (
$outputxslt) {
        
header('Content-Type: '.$outputtype);
        print 
$result;
        
$service->free();
    } else {
        
header('Content-Type: application/sparql-results+xml; charset=utf-8');
        
header('Content-Type: text/plain; charset=utf-8');
        print 
$xml;
        print 
"\n" '<!-- generated from '.str_replace('--','- -',$csv).' -->';
        
$service->free();
    }
}

# Finish.
global $db;
$service->finish($db->debug);

# Convert CSV to DAWG XML.
function csv2xml($csvdata$header=''$prepend=''$distinct=false$nulls=false) {
    
# Output root element.
    
$xml='<?xml version="1.0"?>
<sparql xmlns="http://www.w3.org/2005/sparql-results#">
'
;

    
# Split CSV data into lines, skipping empty lines.
    
if (''!=$header)
        
$csvdata=$header."\n".$csvdata;
    
$lines=preg_split("|\n+|"$csvdata, -1PREG_SPLIT_NO_EMPTY);

    
# First line is required header, field names...
    
if (!sizeof($lines))
        return 
'';
    
$fields=csvsplit(array_shift($lines));
    
$xml.='<head>
'
;
    
$bindnames=array();
    foreach (
$fields as $field) {
        
$bindname=htmlspecialchars(preg_replace('|[^\w\.-_:\d]|','_',$field));
        
$bindnames[] = $bindname;
        
$xml.='  <variable name="'.$bindname.'"/>
'
;
    }
    
$xml.='</head>
'
;

    
# Output bindings for each line.
    
$xml.='<results>
'
;
    
$results=array();
    while (
$line=array_shift($lines)) {
        
$fields=csvsplit($prepend.$line);
        
$result='  <result>
'
;
        
reset($bindnames);
        foreach (
$fields as $field) {
            
$result.='    <binding name="'.current($bindnames).'">';
            if (
preg_match('|^\w+:\S+$|'$field))
                
$result.='<uri>'.htmlspecialchars($field).'</uri>';
            elseif (
''!=$field && (!$nulls || 'NULL'!=$field))
                
$result.='<literal>'.htmlspecialchars($field).'</literal>';
            else
                
$result.='<unbound/>';
            
$result.='</binding>
'
;
            
next($bindnames);
        }
        
$result.='  </result>
'
;
        if (
$distinct) {
            if (!
array_key_exists($result,$results)) {
                
$results[$result]=1;
                
$xml.=$result;
            }
        } else
            
$xml.=$result;
    }
    
$xml.='</results>
'
;

    
# End document and return.
    
return $xml.='</sparql>';
}

# Split a line with data...
function csvsplit($line) {
    
$fields=preg_split('|[,\t]|',trim($line));
    
$newfields=array();
    
$last='';
    foreach (
$fields as $field) {
        if (
$last!='') {
            
$last.=','.$field;
            if (
$last{strlen($last)-1}=='"') {
                
$newfields[]=substr($last,1,strlen($last)-2);
                
$last='';
            }
        } elseif (
$field!='' && $field{0}=='"')
            
$last.=$field;
        else
            
$newfields[]=$field;
    }
    if (
strlen($last) && $last[strlen($last)-1]=='"')
        
$newfields[]=substr($last,1,strlen($last)-2);
    elseif (
$last!='')
        
$newfields[]=$last;
    return 
$newfields;
}

// EOF