2025年3月10日 星期一

使用 python autograd 驗證 Gradient Decent 演算法

# test_lce.py
from autograd import elementwise_grad as grad
import autograd.numpy as auto_np
import matplotlib.pyplot as plt
import numpy as np
import tqdm
ln_  = lambda v: auto_np.log(v) # ln(v): natural log function
sum_ = lambda v: auto_np.sum(v) # Σ(v): summation function
sigmoid_ = lambda x: 1/(1 + auto_np.exp(-x))# sigmoid function = 1/(1 + exp(-x))
predict_ = lambda x, w: auto_np.dot(x, w) # forward x into the neural network w to get output
probability = lambda x, w: sigmoid_(predict_(x, w))
Y1 = np.array([1, 0, 0, 0]) # 期望值輸出(機率), 使用 one hot encode
X1 = np.array([[0.52,  1.12,  0.77],
               [0.88, -1.08,  0.15],
               [0.52,  0.06, -1.30],
               [0.74, -2.49,  1.39]]) # X1 有 4 個訓練樣本(= 4 rows), 每個樣本有3個特性組成一個列向量
W1 = np.array([0.0, 0.0, 0.0]) # 訓練參數對應的權重 weight 期望達成輸出 Y1 = [1 0 0 0]
def logistic_crosss_entropy(W, X, P): # W, X, P 都是矩陣, LCE = -Σ P*ln(Q), P: one hot encode, element ∈ {0, 1}
    length = X.shape[0]
    if length == P.shape[0] and W.shape[0] == X.shape[1] :
        Z = predict_(X, W) # predict output
        lce = P*ln_(1 + auto_np.exp(-Z)) + (1 - P) * ln_(1 + auto_np.exp(Z))
        return sum_(lce) / length
gradient_Loss = grad(logistic_crosss_entropy) # ∇L(w, x, y) = ∂L(w, x, y)/∂w => loss L(w) focus on w
logger = []
print(f"訓練前 lce loss:{logistic_crosss_entropy(W1, X1, Y1)},  輸出機率: {probability(X1, W1)}")
for iteration in tqdm.tqdm(range(1000)):
    W1 -= 0.01 * gradient_Loss(W1, X1, Y1) # learning rate = 0.01, 1 batch (4 samples), use GD optimizer
    logger.append([iteration, logistic_crosss_entropy(W1, X1, Y1)]) # 使用訓練完後的 W1 估算 LCE
print(f"訓練後 lce loss:{logistic_crosss_entropy(W1, X1, Y1)},  輸出機率: {probability(X1, W1)}")
if len(logger) > 0: # plot figure for the data in logger
    logger = np.array(logger).T
    plt.plot(logger[0], logger[1], color="r", label="Logistic Cross Entropy")
    plt.xlabel("epochs")
    plt.ylabel("LCE")
    plt.title("Training")
    plt.legend() # to show the multi label
    plt.show()

備註:
 sigmoid = 1/(1 + exp(-z))
 logistic_crosss_entropy LCE = -Σ P*ln(Q)
 z = x.dot(w)
 Q = sigmoid(z) = 1/(1 + exp(-z))
 element of P ∈ {0, 1}
 loss mean = sum_(- P*ln_(Q) - (1 - P)*ln_(1.0 - Q)) / length
 LCE = - P*ln_(Q) - (1 - P)*ln_(1.0 - Q)
     = - P*ln_(1/(1 + exp(-z))) - (1 - P)*ln_(1.0 - 1/(1 + exp(-z))
     = P*ln_(1 + exp(-z)) + (1 - P)*ln_((exp(-z) + 1)/exp(-z))
     = P*ln_(1 + exp(-z)) + (1 - P)*ln_(1 + 1/exp(-z))
     = P*ln_(1 + exp(-z)) + (1 - P)*ln_(1 + exp(z))

沒有留言:

張貼留言

使用 python 簡單實現多 cpu 平行處理

 import multiprocessing as mp import time def iso_task(k):     print(f"task {k} @{time.time()} sec: sleep for 1 second")     time....