{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Mini-Batch Stochastic Gradient Descent\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import d2l\n",
"from mxnet import autograd, gluon, init, nd\n",
"from mxnet.gluon import nn, data as gdata, loss as gloss\n",
"import numpy as np\n",
"import time"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Reading Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"attributes": {
"classes": [],
"id": "",
"n": "1"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(1500, 5)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_data_ch7(): \n",
" data = np.genfromtxt('./airfoil_self_noise.dat', delimiter='\\t')\n",
" data = (data - data.mean(axis=0)) / data.std(axis=0)\n",
" return nd.array(data[:1500, :-1]), nd.array(data[:1500, -1])\n",
"\n",
"features, labels = get_data_ch7()\n",
"features.shape"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Implementation from Scratch\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"attributes": {
"classes": [],
"id": "",
"n": "3"
}
},
"outputs": [],
"source": [
"def sgd(params, states, hyperparams):\n",
" for p in params:\n",
" p[:] -= hyperparams['lr'] * p.grad"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"The training function"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"attributes": {
"classes": [],
"id": "",
"n": "4"
}
},
"outputs": [],
"source": [
"def train_ch7(trainer_fn, states, hyperparams, features, labels,\n",
" batch_size=10, num_epochs=2):\n",
" net, loss = d2l.linreg, d2l.squared_loss\n",
" w = nd.random.normal(scale=0.01, shape=(features.shape[1], 1))\n",
" b = nd.zeros(1)\n",
" w.attach_grad()\n",
" b.attach_grad()\n",
" def eval_loss():\n",
" return loss(net(features, w, b), labels).mean().asscalar()\n",
" ls, ts = [eval_loss()], [0,]\n",
" data_iter = gdata.DataLoader(\n",
" gdata.ArrayDataset(features, labels), batch_size, shuffle=True)\n",
" start = time.time()\n",
" for _ in range(num_epochs):\n",
" for batch_i, (X, y) in enumerate(data_iter):\n",
" with autograd.record():\n",
" l = loss(net(X, w, b), y).mean() \n",
" l.backward()\n",
" trainer_fn([w, b], states, hyperparams)\n",
" if (batch_i + 1) * batch_size % 10 == 0:\n",
" ts.append(time.time() - start + ts[-1])\n",
" ls.append(eval_loss())\n",
" start = time.time()\n",
" print('loss: %f, %f sec per epoch' % (ls[-1], ts[-1]/num_epochs))\n",
" d2l.set_figsize()\n",
" d2l.plt.plot(np.linspace(0, num_epochs, len(ls)), ls)\n",
" d2l.plt.xlabel('epoch')\n",
" d2l.plt.ylabel('loss')\n",
" return ts, ls"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"attributes": {
"classes": [],
"id": "",
"n": "5"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loss: 0.243936, 0.011909 sec per epoch\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
"