Validation for league CSV


I love league/csv.

However I often want to do validation for the files people are uploading or my site is scraping. To make this easier I decided to write a class to make this faster to implement instead of rewriting it over and over.

I included in it a few common validators are included such as date check, numeric value check and a regex match – but what I really like is that I can pass through more complicated checks as variable functions. The function returns true for a pass, and any other returns get passed as an exception message. As an example if I wanted to make sure that the value is between 5 and 7 I could pass this:


$checkFiveToSeven = function($x) {
	if (!is_numeric($x)){ return 'Value not numeric'; }
	if ($x<5){ return 'Value less than 5'; }
	if ($x>7){ return 'Value higher than 7'; }
	return true;
};
$ImportCSV->add_requiredCol(name:'columnName', functionpass:$checkFiveToSeven);

We have to have league/csv in the directory this class is in (in a namespace in this example but of course can be altered to meet other needs) in the same format as used in this composer.json:

{
    "config": {
        "vendor-dir": "leaguecsvVendor"
    },
    "require": {
	"league/csv": "^9.0"
    }
}

Once you have league/csv ready this is the code I use to get the validation working with it.

<?php 
namespace NAMESPACE\CSVImport;
use PDO;
use \Exception;
use League\Csv\Reader;
use League\Csv\Statement;

/*
EXAMPLE USAGE:
	try{
		$ImportCSV = new NAMESPACE\CSVImport\CSVImport;
		$ImportCSV->add_requiredCol(name:'Fname',
									type:'text',
									maxlength:50,
									minlength:1,
								   );
		$ImportCSV->add_requiredCol(name:'Lname',
									type:'text',
									maxlength:50,
									minlength:1,
								   );
		$ImportCSV->add_requiredCol(name:'DOB',
									type:'date',
									functionpass:$functionVar,
									regex:"/\d{4}\-\d{1,2}-\d{1,2}/",
								   );
		$ImportCSV->add_requiredCol(name:'Sex',
									inarray:['M','F','D'],
								   );
		$csvFile = realpath('test.csv');
		$ImportCSV->loadFile($csvFile)->validate();
		foreach ($ImportCSV->CSV as $rs){
			print_r(['rs',$rs]);
		}
	} catch(Exception $e) {
		echo "<p><mark>EXCEPTION!! <pre>";
		echo $e->getMessage();
		echo "</pre></mark></p>";
	}

*/


class CSVImport
{
	public $CSV;
	
	public $ColumnNames;
	protected $requiredCols = [];
	
	public $errors = [];

    public function __construct()
    {
		require_once("leaguecsvVendor/autoload.php");
    }
	
	public function loadFile($filePath,$HeaderOffset=0)
	{
		if (!file_exists($filePath)) {
			throw new Exception(__METHOD__." passed file not found");
		}
		$this->CSV = Reader::createFromPath($filePath, 'r');
		if (isset($HeaderOffset)) {
			$this->CSV->setHeaderOffset($HeaderOffset);
		}
		$this->ColumnNames = $this->CSV->getHeader();
		return $this;
	}
	
	public function validate()
	{
		$this->validate_RequiredColumnNames();
		$this->validate_FileData();
	}
	
	public function getErrors()
	{
		return $this->errors;
	}
	
	public function validate_FileData()
	{ // go through each record and verify if the contents are as required
		$line= 1;
		foreach ($this->CSV as $rs) {
			foreach ($this->requiredCols as $req) {
				// EACH VALIDATION TYPE
				if (isset($req['maxlength'])) { // maximum char length
					if (strlen($rs[$req['name']])>$req['maxlength']) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']].') exceeded max length of '.$req['maxlength'];
					}
				}
				if (isset($req['minlength'])) { // minimum char length
					if (strlen($rs[$req['name']])<$req['minlength']) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']].') is under minimum length of '.$req['minlength'];
					}
				}
				if (isset($req['regex'])) { // using a regex to match
					if (!preg_match($req['regex'],$rs[$req['name']])) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']].') did not match required format: '.$req['regex'];
					}
				}
				if (isset($req['methodcall'])) { // an existing method in this class
					if (!call_user_func([$this,$req['methodcall']],$rs[$req['name']])) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']].') failed validation method: '.$req['methodcall'];
					}
				}
				if (isset($req['functionpass'])) { // passing a function as a variable. 
					// Custom functions must accept either ($value) or ($value,$record)
					$reflection = new \ReflectionFunction($req['functionpass']);
					if ($reflection->getNumberOfRequiredParameters()==2) {
						$fpVal = $req['functionpass']($rs[$req['name']],$rs);
					} else {
						$fpVal = $req['functionpass']($rs[$req['name']]);
					}
					if ($fpVal!==true) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']]
							.') custom validation function returned: '.$fpVal;
					}
				}
				if (isset($req['inarray'])) { // passing an array of allowed values
					if (!in_array($rs[$req['name']],$req['inarray'])) {
						$this->errors[] = 'Line '.$line.', Column "'.$req['name'].'" ('.$rs[$req['name']].') not found in list of valid options: ['.implode(', ',$req['inarray']).']';
					}
				}
			}
			$line++;
		}
		if (count($this->errors)) {
			throw new Exception("File has the validation error(s)");
		}
	}
	
	public function validate_RequiredColumnNames()
	{ // see if the required column names are in this file
		foreach ($this->requiredCols as $req) {
			if (!in_array($req['name'],$this->ColumnNames)) {
				$this->errors[] = 'Missing required column: '.$req['name'];
			}
		}
		if (count($this->errors)) {
			throw new Exception("File is missing required column(s)");
		}
	}
	
	public function add_requiredCol(string $name,
									string $type=null,
								    int $maxlength=null,
								    int $minlength=null,
									string $regex=null,
									string $methodcall=null,
									object $functionpass=null,
									array $inarray=[],
								   )
	{
		$arr = [];
		$arr['name'] = $name;
		if (isset($type)) {
			// preset type variables :: 
			// NOTE these will be overwritted by additional vars included as this 
			// 	intentionally runs before the other var sets
			switch ($type) {
				case 'date':
					$arr['methodcall'] = 'isDateFormat';
					break;
				case 'numeric':
					$arr['methodcall'] = 'isNumericFormat';
					break;
			}
		}
		if (isset($maxlength)) {
			$arr['maxlength'] = $maxlength;
		}
		if (isset($minlength)) {
			$arr['minlength'] = $minlength;
		}
		if (isset($regex)) {
			$arr['regex'] = $regex;
		}
		if (isset($methodcall)) {
			$arr['methodcall'] = $methodcall;
		}
		if (isset($functionpass)) {
			$arr['functionpass'] = $functionpass;
		}
		if (isset($inarray)&&count($inarray)) {
			$arr['inarray'] = $inarray;
		}
		$this->requiredCols[$name] = $arr;
	}

	public function isDateFormat($value)
	{
		if(strtotime($value)){
			return true;
		}
		return false;
	}

	public function isNumericFormat($value)
	{
		if (is_numeric($value)) {
			return true;
		}
		return false;
	}
	
}