Forráskód Böngészése

augmentation (Frame shifting) now done separately in each dataset

Sebastian Vendt 6 éve
szülő
commit
9cac42e93b
2 módosított fájl, 57 hozzáadás és 36 törlés
  1. 37 30
      MATLAB/generate.m
  2. 20 6
      MATLAB/generateTrainingDataFromCSV.m

+ 37 - 30
MATLAB/generate.m

@@ -4,10 +4,33 @@ frameoffset = 25;
 maxFrameShift = 10;
 splitRatio = [70, 20, 10]; % percentage of training, validation, testing needs to add up to 100!!!
 file = '2019_08_16_1856';
-filepointer = csvread(strcat('AppData/', file, '.csv'), 2, 0);
-% get the training data and the labels
-[dataset, lbls] = generateTrainingDataFromCSV(filepointer, framesize, frameoffset, maxFrameShift);
+% read the csv and start in the second row, first column
+csvData = csvread(strcat('AppData/', file, '.csv'), 2, 0);
 
+% split into seperate sets and save them as mat (normalization will be done
+% in julia)
+[train_data, train_lbls] = generateTrainingDataFromCSV(csvData, 0, splitRatio(1), framesize, frameoffset, maxFrameShift);
+[validation_data, validation_lbls] = generateTrainingDataFromCSV(csvData, splitRatio(1), splitRatio(2), framesize, frameoffset, maxFrameShift);
+[test_data, test_lbls] = generateTrainingDataFromCSV(csvData, splitRatio(2) + splitRatio(1), splitRatio(3), framesize, frameoffset, maxFrameShift);
+
+% shuffle the datasets
+shuffleSet(train_data, train_lbls);
+shuffleSet(validation_data, validation_lbls);
+shuffleSet(test_data, test_lbls);
+
+data = train_data;
+labels = train_lbls;
+save(strcat('TrainingData/', file, '_TRAIN.mat'),'data', 'labels')
+
+data = validation_data;
+labels = validation_lbls;
+save(strcat('TrainingData/', file, '_VAL.mat'),'data', 'labels')
+
+data = test_data;
+labels = test_lbls;
+save(strcat('TrainingData/', file, '_TEST.mat'),'data', 'labels')
+
+dataset = cat(3, train_data, validation_data, test_data);
 % gathering some statistics
 % 1. pointerDownTime equals the time a finger is residing on the screen 
 % 2. timeBetweenTouchEvents indicating the frequency of the taps
@@ -16,18 +39,17 @@ POINTERDOWNTIME1 = 14;
 POINTERDOWNTIME2 = 15;
 TIMEBETWEENTOUCHEVENTS = 16;
 
-indicesPDT1 = find(filepointer(:, POINTERDOWNTIME1) ~= 0);
-indicesPDT2 = find(filepointer(:, POINTERDOWNTIME2) ~= 0);
-indicesTBT = find(filepointer(:, TIMEBETWEENTOUCHEVENTS) ~= 0);
-pointerDownTimes = filepointer(indicesPDT1, POINTERDOWNTIME1);
-pointerDownTimes = cat(1, pointerDownTimes, filepointer(indicesPDT1, POINTERDOWNTIME1));
-TimeBetweenTouchEvents = filepointer(indicesTBT(2:end), TIMEBETWEENTOUCHEVENTS);
+indicesPDT1 = find(csvData(:, POINTERDOWNTIME1) ~= 0);
+indicesPDT2 = find(csvData(:, POINTERDOWNTIME2) ~= 0);
+indicesTBT = find(csvData(:, TIMEBETWEENTOUCHEVENTS) ~= 0);
+pointerDownTimes = csvData(indicesPDT1, POINTERDOWNTIME1);
+pointerDownTimes = cat(1, pointerDownTimes, csvData(indicesPDT1, POINTERDOWNTIME1));
+TimeBetweenTouchEvents = csvData(indicesTBT(2:end), TIMEBETWEENTOUCHEVENTS);
 
 maxGYRO = [max(dataset(:,:,1)')', max(dataset(:,:,2)')', max(dataset(:,:,3)')'];
 maxACC = [max(dataset(:,:,4)')', max(dataset(:,:,5)')', max(dataset(:,:,6)')'];
 
 fig1 = figure;
-% TODO Something is still wrong with these times...
 histogram(pointerDownTimes ./ 1000000) % Divide from ns to ms 
 title('Histogram of the Duriation of touch events in ms')
 fig2 = figure;
@@ -54,23 +76,8 @@ subplot(2,3,6)
 histogram(maxACC(:, 3), nbins)
 title('Hist Max ACC Z')
 
-% shuffle the dataset
-shuffle = randperm(size(dataset, 3));
-dataset = dataset(:, :, shuffle);
-lbls = lbls(:, shuffle);
-
-% split into seperate sets and save them as mat (normalization will be done
-% in julia)
-splitTrain = floor(size(dataset, 3) * splitRatio(1) / 100 )
-data = dataset(:, :, 1:splitTrain);
-labels = lbls(:, 1:splitTrain);
-save(strcat('TrainingData/', file, '_TRAIN_.mat'),'data', 'labels')
-
-splitVal = floor(size(dataset, 3) * (splitRatio(2) + splitRatio(1)) / 100 )
-data = dataset(:, :, splitTrain+1:splitVal);
-labels = lbls(:, splitTrain+1:splitVal);
-save(strcat('TrainingData/', file, '_VAL_.mat'),'data', 'labels')
-
-data = dataset(:, :, splitVal+1:end);
-labels = lbls(:, splitVal+1:end);
-save(strcat('TrainingData/', file, '_TEST_.mat'),'data', 'labels')
+function [dataset, labels] = shuffleSet(data, lbls)
+    shuffle = randperm(size(data, 3));
+    dataset = data(:, :, shuffle);
+    labels = lbls(:, shuffle);
+end

+ 20 - 6
MATLAB/generateTrainingDataFromCSV.m

@@ -1,21 +1,35 @@
-function [data, labels] = generateTrainingDataFromCSV(file, framesize, frameoffset, maxFrameShift)
-% generateTrainingDataFromCSV(file, framesize, frameoffset, maxFrameShift)
+function [data, labels] = generateTrainingDataFromCSV(file, offset, len, framesize, frameoffset, maxFrameShift)
+% generateTrainingDataFromCSV(file, offset, length, framesize, frameoffset, maxFrameShift)
 % generating frames with fixed number of samples, parametrized by framesize
 % and frameoffset. frameoffset determines the number of sample to include
 % before the actual touch event
 % maxFrameShift determines the jitter in both directions when determining
 % the origin of the frame based on the postion of the touch event
+% finally offset and lenght defined in percent (in 100 percent) of the dataset to be used
+% offset + lenght cannot be larger than 100
+%
+% Returns two matrices
+% data: 
+% framesize x 6 x N (Where N denotes the total number of samples)
+%
+% labels:
+% 2 x N 
 
   X_COORD = 10;
   Y_COORD = 11;
-  
-  % read the csv and start in the second row, first column
 
-  sizeData = size(file);
-  fprintf("read %d sensor samples\n", sizeData(1));
   
   % find the action down events
   indices = find(file(:,9) == 1);
+  events = length(indices);
+ 
+  
+  offset_index = floor(length(indices) * offset / 100) + 1;
+  len_index = floor(length(indices) * len / 100) + offset_index - 1;
+  
+  indices = indices(offset_index:len_index);
+  fprintf("%d touch events in %d sensor samples, using %d:%d\n", events, size(file, 1),offset_index, len_index);
+  
   data = []; % zeros(framesize, 6, size(indices,1) * (2 * maxFrameShift + 1));
   labels = []; % zeros(2, size(indices,1) * (2 * maxFrameShift + 1));
   % create the frames and labels