《李沐动手学深度学习》P30
import numpy as np
import matplotlib.pyplot as plt
import random# y = x * w + b + noise
# x dimension: (num_samples, 2)
# w dimension: (2, 1)
# b dimension: (1, 1)
# noise dimension: (num_samples, 1)
def generate_data(num_samples, w, b):x = np.random.rand(num_samples, 2)w = np.array(w).reshape(2, 1)b = np.array(b).reshape(1, 1)noise = np.random.normal(0, 0.01, (num_samples, 1))y = np.dot(x, w) + b + noisereturn x, ydef data_iter(x, y, batch_size):num_samples = len(x)indices = list(range(num_samples))random.shuffle(indices)for i in range(0, num_samples, batch_size):j = indices[i: min(i + batch_size, num_samples)]yield x[j], y[j]def model(x, w, b):return np.dot(x, w) + b# squared loss
def loss(y_hat, y):return np.mean((y_hat - y) ** 2) / 2# calculate the gradient of loss function
def gradient(x, y, y_hat):num_samples = len(x)dw = np.dot(x.T, (y_hat - y)) / num_samplesdb = np.sum(y_hat - y) / num_samplesreturn dw, dbdef sgd(params, grads, lr):for param, grad in zip(params, grads):param -= lr * grad # 注意这里不需要再除以bacth_size了,因为上面的gradient()已经对梯度做了平均def train(x, y, w, b, lr, batch_size, num_epochs):for epoch in range(num_epochs):for data, label in data_iter(x, y, batch_size):y_hat = model(data, w, b)l = loss(y_hat, label)dw, db = gradient(data, label, y_hat)sgd([w, b], [dw, db], lr)print('epoch %d, loss %f' % (epoch + 1, l))def main():num_samples = 1000x, y = generate_data(num_samples, [2, -3], 5)w = np.random.normal(0, 0.01, (2, 1))b = np.zeros((1, 1))lr = 0.1batch_size = 10num_epochs = 5train(x, y, w, b, lr, batch_size, num_epochs)print(w, b)if __name__ == '__main__':main()