<?php
/** csv_pair_file Example file
*
* Class for manage csv pair files
*
* Used normally for Neural Networks, Deep learning, Machine learning, Intelligence Artificial, ....
*
*
*
* @author Rafael Martin Soto
* @author {@link http://www.inatica.com/ Inatica}
* @since July 2021
* @version 1.0.0
* @license GNU General Public License v3.0
*/
require_once( 'csv_pair_file_class.php' ); // For manage csv files
$csv_original_dataset = new csv_pair_file('original_dataset.csv', 'desired_dataset.csv');
echo 'Randomize original and desired Datasets'.PHP_EOL;
$csv_original_dataset->randomize();
$perc_Train = 70;
$perc_rest_Test = 50;
echo 'Splitting Randomized Dataset in '.$perc_Train.'% for Train and '.(100-$perc_Train).'% for Test & Validation'.PHP_EOL;
$RandomizedName = $csv_original_dataset->get_csv_randomized_file_names();
$csv = new csv_pair_file( $RandomizedName[0], $RandomizedName[1] );
$SplittedNames = $csv->split( $perc_Train );
// We have New Train & Desired Data File...
$csv_train_dataset = new csv_pair_file( $SplittedNames[0][0], $SplittedNames[0][1] );
// The rest will need to splitted in 2 parts (Test & Validation data)
echo 'Splitting Rest '.(100-$perc_Train).'% Dataset in 2 files of '.$perc_rest_Test.'% for Test and '.(100-$perc_rest_Test).'% for Validation'.PHP_EOL;
$csv = new csv_pair_file( $SplittedNames[1][0], $SplittedNames[1][1] );
$SplittedNames = $csv->split( $perc_rest_Test ); // new csv is perc_rest_Test% of the global data (100% - 70% = 30%) . Split it at 50% (80% + 15% + 15% = 100%)
// We have Test & Validation New Data Files
$this->csv_test_dataset = new csv_pair_file( $SplittedNames[0][0], $SplittedNames[0][1] );
$this->csv_validation_dataset = new csv_pair_file( $SplittedNames[1][0], $SplittedNames[1][1] );
?>
|