generate.m 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. % One need to determine the framesize and frameoffset beforehand...
  2. framesize = 60; % was 48
  3. frameoffset = 28;
  4. maxFrameShift = 10;
  5. splitRatio = [70, 20, 10]; % percentage of training, validation, testing needs to add up to 100!!!
  6. file = '2019_09_09_1658';
  7. % read the csv and start in the second row, first column
  8. csvData = csvread(strcat('AppData/', file, '.csv'), 2, 0);
  9. % split into seperate sets and save them as mat (normalization will be done
  10. % in julia)
  11. [train_data, train_lbls] = generateTrainingDataFromCSV(csvData, 0, splitRatio(1), framesize, frameoffset, maxFrameShift);
  12. [validation_data, validation_lbls] = generateTrainingDataFromCSV(csvData, splitRatio(1), splitRatio(2), framesize, frameoffset, maxFrameShift);
  13. [test_data, test_lbls] = generateTrainingDataFromCSV(csvData, splitRatio(2) + splitRatio(1), splitRatio(3), framesize, frameoffset, maxFrameShift);
  14. % shuffle the datasets
  15. shuffleSet(train_data, train_lbls);
  16. shuffleSet(validation_data, validation_lbls);
  17. shuffleSet(test_data, test_lbls);
  18. data = train_data;
  19. labels = train_lbls;
  20. save(strcat('TrainingData/', file, '_TRAIN.mat'),'data', 'labels')
  21. data = validation_data;
  22. labels = validation_lbls;
  23. save(strcat('TrainingData/', file, '_VAL.mat'),'data', 'labels')
  24. data = test_data;
  25. labels = test_lbls;
  26. save(strcat('TrainingData/', file, '_TEST.mat'),'data', 'labels')
  27. dataset = cat(3, train_data, validation_data, test_data);
  28. % gathering some statistics
  29. % 1. pointerDownTime equals the time a finger is residing on the screen
  30. % 2. timeBetweenTouchEvents indicating the frequency of the taps
  31. % 3. max/min Values of Frames indicating the strength of the taps
  32. POINTERDOWNTIME1 = 14;
  33. POINTERDOWNTIME2 = 15;
  34. TIMEBETWEENTOUCHEVENTS = 16;
  35. indicesPDT1 = find(csvData(:, POINTERDOWNTIME1) ~= 0);
  36. indicesPDT2 = find(csvData(:, POINTERDOWNTIME2) ~= 0);
  37. indicesTBT = find(csvData(:, TIMEBETWEENTOUCHEVENTS) ~= 0);
  38. pointerDownTimes = csvData(indicesPDT1, POINTERDOWNTIME1);
  39. pointerDownTimes = cat(1, pointerDownTimes, csvData(indicesPDT1, POINTERDOWNTIME1));
  40. TimeBetweenTouchEvents = csvData(indicesTBT(2:end), TIMEBETWEENTOUCHEVENTS);
  41. % remove the 8 largest times. These are the break times
  42. TimeBetweenTouchEvents = sort(TimeBetweenTouchEvents);
  43. TimeBetweenTouchEvents = TimeBetweenTouchEvents(1:end-8);
  44. maxGYRO = [max(squeeze(abs(dataset(:,1,:))))', max(squeeze(abs(dataset(:,2,:))))', max(squeeze(abs(dataset(:,3,:))))'];
  45. maxACC = [max(squeeze(abs(dataset(:,4,:))))', max(squeeze(abs(dataset(:,5,:))))', max(squeeze(abs(dataset(:,6,:))))'];
  46. fig1 = figure;
  47. histogram(pointerDownTimes ./ 1000000) % Divide from ns to ms
  48. title('Histogram of the Duriation of touch events in ms')
  49. fig2 = figure;
  50. histogram(TimeBetweenTouchEvents ./ 1000000) % Divide from ns to ms
  51. title('Histogram of the time between sequential touch events in ms')
  52. fig3 = figure;
  53. subplot(2,3,1)
  54. nbins = 40;
  55. histogram(maxGYRO(:, 1), nbins)
  56. title('Hist Max Gyro X')
  57. subplot(2,3,2)
  58. histogram(maxGYRO(:, 2), nbins)
  59. title('Hist Max Gyro Y')
  60. subplot(2,3,3)
  61. histogram(maxGYRO(:, 3), nbins)
  62. title('Hist Max Gyro Z')
  63. subplot(2,3,4)
  64. histogram(maxACC(:, 1), nbins)
  65. title('Hist Max ACC X')
  66. subplot(2,3,5)
  67. histogram(maxACC(:, 2), nbins)
  68. title('Hist Max ACC Y')
  69. subplot(2,3,6)
  70. histogram(maxACC(:, 3), nbins)
  71. title('Hist Max ACC Z')
  72. function [dataset, labels] = shuffleSet(data, lbls)
  73. shuffle = randperm(size(data, 3));
  74. dataset = data(:, :, shuffle);
  75. labels = lbls(:, shuffle);
  76. end
  77. function dataout = removeoutliers(datain)
  78. %REMOVEOUTLIERS Remove outliers from data using the Thompson Tau method.
  79. % For vectors, REMOVEOUTLIERS(datain) removes the elements in datain that
  80. % are considered outliers as defined by the Thompson Tau method. This
  81. % applies to any data vector greater than three elements in length, with
  82. % no upper limit (other than that of the machine running the script).
  83. % Additionally, the output vector is sorted in ascending order.
  84. %
  85. % Example: If datain = [1 34 35 35 33 34 37 38 35 35 36 150]
  86. %
  87. % then removeoutliers(datain) will return the vector:
  88. % dataout = 33 34 34 35 35 35 35 36 37 38
  89. %
  90. % See also MEDIAN, STD, MIN, MAX, VAR, COV, MODE.
  91. % This function was written by Vince Petaccio on July 30, 2009.
  92. n=length(datain); %Determine the number of samples in datain
  93. if n < 3
  94. display(['ERROR: There must be at least 3 samples in the' ...
  95. ' data set in order to use the removeoutliers function.']);
  96. else
  97. S=std(datain); %Calculate S, the sample standard deviation
  98. xbar=mean(datain); %Calculate the sample mean
  99. %tau is a vector containing values for Thompson's Tau
  100. tau = [1.150 1.393 1.572 1.656 1.711 1.749 1.777 1.798 1.815 1.829 ...
  101. 1.840 1.849 1.858 1.865 1.871 1.876 1.881 1.885 1.889 1.893 ...
  102. 1.896 1.899 1.902 1.904 1.906 1.908 1.910 1.911 1.913 1.914 ...
  103. 1.916 1.917 1.919 1.920 1.921 1.922 1.923 1.924];
  104. %Determine the value of S times Tau
  105. if n > length(tau)
  106. TS=1.960*S; %For n > 40
  107. else
  108. TS=tau(n)*S; %For samples of size 3 < n < 40
  109. end
  110. %Sort the input data vector so that removing the extreme values
  111. %becomes an arbitrary task
  112. dataout = sort(datain);
  113. %Compare the values of extreme high data points to TS
  114. while abs((max(dataout)-xbar)) > TS
  115. dataout=dataout(1:(length(dataout)-1));
  116. %Determine the NEW value of S times Tau
  117. S=std(dataout);
  118. xbar=mean(dataout);
  119. if length(dataout) > length(tau)
  120. TS=1.960*S; %For n > 40
  121. else
  122. TS=tau(length(dataout))*S; %For samples of size 3 < n < 40
  123. end
  124. end
  125. %Compare the values of extreme low data points to TS.
  126. %Begin by determining the NEW value of S times Tau
  127. S=std(dataout);
  128. xbar=mean(dataout);
  129. if length(dataout) > length(tau)
  130. TS=1.960*S; %For n > 40
  131. else
  132. TS=tau(length(dataout))*S; %For samples of size 3 < n < 40
  133. end
  134. while abs((min(dataout)-xbar)) > TS
  135. dataout=dataout(2:(length(dataout)));
  136. %Determine the NEW value of S times Tau
  137. S=std(dataout);
  138. xbar=mean(dataout);
  139. if length(dataout) > length(tau)
  140. TS=1.960*S; %For n > 40
  141. else
  142. TS=tau(length(dataout))*S; %For samples of size 3 < n < 40
  143. end
  144. end
  145. end
  146. end
  147. %vjp