import {
Array1D,
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
});
}
...
}
export default ColorAccessibilityModel;
第三,shuffle 输入和目标阵列。shuffle 的时候,deeplearn.js 提供的 shuffler 将二者保存在 sync 中。每次训练迭代都会出现 shuffle,以馈送不同的输入作为神经网络的 batch。整个 shuffle 流程可以改善训练算法,因为它更可能通过避免过拟合来实现泛化。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder,
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
const shuffledInputProviderBuilder = new InCPUMemoryShuffledInputProviderBuilder([
inputArray,
targetArray
]);
const [
inputProvider,
targetProvider,
] = shuffledInputProviderBuilder.getInputProviders();
});
}
...
}
export default ColorAccessibilityModel;
最后,馈送条目(feed entries)是训练阶段中神经网络前馈算法的最终输入。它匹配数据和张量(根据设置阶段的形态而定义)。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
feedEntries;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
const shuffledInputProviderBuilder = new InCPUMemoryShuffledInputProviderBuilder([
inputArray,
targetArray
]);
const [
inputProvider,
targetProvider,
] = shuffledInputProviderBuilder.getInputProviders();
this.feedEntries = [
{ tensor: this.inputTensor, data: inputProvider },
{ tensor: this.targetTensor, data: targetProvider },
];
});
}
...
}
export default ColorAccessibilityModel;
这样,神经网络的设置就结束了。神经网络的所有层和单元都实现了,训练集也准备好进行训练了。现在只需要添加两个配置神经网络行为的超参数,它们适用于下个阶段:训练阶段。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder,
Graph,
Session,
SGDOptimizer,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
optimizer;
batchSize = 300;
initialLearningRate = 0.06;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
feedEntries;
constructor() {
this.optimizer = new SGDOptimizer(this.initialLearningRate);
}
...
}
export default ColorAccessibilityModel;
第一个参数是学习速率(learning rate)。学习速率决定算法的收敛速度,以最小化成本。我们应该假定它的数值很高,但实际上不能太高了。否则梯度下降就不会收敛,因为找不到局部最优值。
第二个参数是批尺寸(batch size)。它定义每个 epoch(迭代)里有多少个训练集的数据点通过神经网络。一个 epoch 等于一批数据点的一次正向传播和一次反向传播。以批次的方式训练神经网络有两个好处:第一,这样可以防止密集计算,因为算法训练时使用了内存中的少量数据点;第二,这样可以让神经网络更快地进行批处理,因为每个 epoch 中权重会随着每个批次的数据点进行调整——而不是等到整个数据集训练完之后再进行改动。
训练阶段
评论