1 | # improt package |
1 | #HOG特征提取 |
1 | # flatten |
1 | # shuffle data load |
1 | # no shuffle |
1 | # shuffle load_data |
1 | # read data |
1 | from glob import glob |
1 | # output |
1 | def get_data(train_images,train_label,test_images,test_label): |
((121000, 50), (50, 1), (121000, 30), (30, 1))
In [130]:
model parameter
type_num = 0
dim = 10
C = 0.1 #0.6
toler = 0.0001
maxIter = 40
best_acc = 0
best_a = 0
best_r = 0
best_label = []
In [131]:
SVDD算法
import numpy as np
import random
def meet_limit_condition(alpha_i, data_i, a, R, C, toler):
“””
测试alphas[i]是否满足优化条件
:param alpha_i:alphas[i]
:param data_i:data_array[i]
:param a:中心点
:param R:半径
:param C:惩罚因子
:param toler:容忍度
:return:满足优化条件则返回True,否则返回False
“””
# if abs(R ** 2 - np.dot((data_i - a), (data_i - a))) > toler and 0 < alpha_i < C:
Ei = R ** 2 - np.dot((data_i - a), (data_i - a))
if (Ei < -toler and alpha_i < C) or (Ei > toler and alpha_i > 0):
return True
else:
return False
def selectJrand(i, m):
“””
随机选择一个整数
Args:
i 第一个alpha的下标
m 所有alpha的数目
Returns:
j 返回一个不为i的随机数,在0~m之间的整数值
“””
j = i
while j == i:
j = int(random.uniform(0, m))
return j
In [132]:
def calculate_alpha_j(data_array, alphas, i, j, a):
“””
data_array: 测试集
alphas: 旧的alphas值
i, j: 当前选出的将要进行优化的alpha的下标
返回值: 新的alphas[j]值
“””
a1 = np.array(a)
x1 = np.array(data_array[i])
x2 = np.array(data_array[j])
x12 = np.dot(x1, x2)
x1_2 = np.dot(x1, x1)
x2_2 = np.dot(x2, x2)
nu = np.dot(a1, x2) - x2_2 - np.dot(a1, x1) + x1_2 + \
alphas[i] * (x12 + x1_2) + alphas[j] * (x1_2 - x2_2 + 3 * x12)
de = 2 * (x1_2 + x2_2 - 2 * x12)
if de == 0:
return 0, False
return -nu / de, True
def calculate_alpha_i(alphas, i):
“””
alphas: 新的alpha数组
i: 要更新的alpha值的下标
返回值: 新的alphas[i]
“””
alpha_sum = alphas.sum() - alphas[i]
return 1 - alpha_sum
In [133]:
def smo(train_data, C=0.6, toler=0.001, maxIter=40):
data_array = np.array(train_data)
m, n = np.shape(data_array)
print(“m:”,m,”n:”,n)
alphas = np.array([1 / m] * m)
R = 0
a = np.array([0.0] * n)
for i in range(m):
a += alphas[i] * data_array[i]
iter = 0
while iter < maxIter:
changed_flag = 0
for i in range(m):
if meet_limit_condition(alphas[i], data_array[i], a, R, C, toler):
j = selectJrand(i, m)
L = max(0, alphas[i] + alphas[j] - C)
H = min(C, alphas[i] + alphas[j])
if L == H:
continue
new_alpha_j, valid = calculate_alpha_j(
data_array, alphas, i, j, a)
if not valid:
continue
if new_alpha_j < L:
new_alpha_j = L
elif new_alpha_j > H:
new_alpha_j = H
if abs(new_alpha_j - alphas[j]) < 0.001:
continue
else:
alphas[j] = new_alpha_j
alphas[i] = calculate_alpha_i(alphas, i)
changed_flag += 1
# check_alphas(alphas, C)
a, R = calculate_a_and_R(data_array, alphas, i, j, C)
if changed_flag == 0:
iter += 1
else:
iter = 0
return a, R
def check_alphas(alphas, C):
“””
检测alphas是否符合要求
:param alphas:alphas
:param C:惩罚因子
:return:符合返回True,否则返回False
“””
a_sum = 0
for i in range(alphas.shape[0]):
if alphas[i] < -0.0001:
print(“alphas” + str(i) + “:” + str(alphas[i]) + “ < 0”)
if alphas[i] > C + 0.0001:
print(“alphas” + str(i) + “:” + str(alphas[i]) + “ > C”)
a_sum += alphas[i]
if abs(a_sum - 1) > 0.0001:
print("alphas sum != 1")
return False
else:
return True
def calculate_a_and_R(data_array, alphas, i, j, C):
“””
计算a, R
:param data_array:
:param alphas:
:param i:
:param j:
:param C:
:return:
“””
m, n = np.shape(data_array)
a = [0] * n
for l in range(m):
a += data_array[l] * alphas[l]
R1 = np.sqrt(np.dot(data_array[i] - a, data_array[i] - a))
R2 = np.sqrt(np.dot(data_array[j] - a, data_array[j] - a))
if 0 < alphas[i] < C:
R = R1
elif 0 < alphas[j] < C:
R = R2
else:
R = (R1 + R2) / 2.0
return a, R
In [134]:
判别函数
import numpy as np
from matplotlib import pyplot as plt
def judge(test_data, a, R):
m, n = np.shape(test_data)
label = []
for i in range(m):
if np.dot(test_data[i] - a, test_data[i] - a) <= R ** 2:
label.append(0)
else:
label.append(1)
return label
def calculate_acc(result_label, correct_label):
“””
返回result_label与correct_label相同的比例
:return: acc = (true positive + false positive)/all
“””
if len(result_label) != len(correct_label):
return -1
acc = 0
for i in range(len(result_label)):
if result_label[i] == correct_label[i]:
acc += 1
return acc / len(result_label)
def draw_picture(train_data, test_data, correct_label, a, R, C, toler, acc):
plt.figure()
plt.scatter(test_data[:, 0], test_data[:, 1], c=correct_label)
plt.scatter(train_data[:, 0], train_data[:, 1], c=’r’)
plt.title(“C = “ + str(C) + “ toler = “ + str(toler) +
“ R = “ + str(R)[0:4] + “ acc = “ + str(acc)[:4])
theta = np.arange(0, 2 * np.pi, 0.01)
x = a[0] + R * np.cos(theta)
y = a[1] + R * np.sin(theta)
plt.plot(x, y)
plt.show()
In [135]:
Dtrain, ytrain, Dtest, ytest = get_data(train_images,train_labels,test_images,test_labels)
ytrain=ytrain.reshape(len(ytrain),1)
ytest=ytest.reshape(len(ytest),1)
ytest.tolist()
Dtrain = Dtrain.T
Dtest = Dtest.T
print(Dtrain.shape, ytrain.shape, Dtest.shape, ytest.shape)
train_data = Dtrain[40:, :]
print(train_data.shape)
test_data, correct_label = Dtest, ytest
a, R = smo(train_data, C, toler, maxIter)
result_label = judge(test_data, a, R)
print(result_label)
(50, 121000)
121000
(50, 121000) (50, 1) (30, 121000) (30, 1)
(10, 121000)
m: 10 n: 121000
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
In [136]:
model train
if name == ‘main‘:
Dtrain, ytrain, Dtest, ytest = get_data(train_images,train_labels,test_images,test_labels)
ytrain=ytrain.reshape(len(ytrain),1)
ytest=ytest.reshape(len(ytest),1)
Dtrain = Dtrain.T
Dtest = Dtest.T
print(Dtrain.shape, ytrain.shape, Dtest.shape, ytest.shape)
train_data = Dtrain[20:, :]
print(train_data.shape)
test_data, correct_label = Dtest, ytest
min_acc = 2
avrg_acc = 0
max_acc = -1
for i in range(50):
print(“epoch “,i)
a, R = smo(train_data, C, toler, maxIter)
result_label = judge(test_data, a, R)
acc = calculate_acc(result_label, correct_label)
if acc > best_acc:
best_acc = acc
best_a = a
best_r = R
best_label = result_label
avrg_acc += acc
if acc < min_acc:
min_acc = acc
if acc > max_acc:
max_acc = acc
#print("accuracy: " + str(acc))
avrg_acc /= 100
print("train type:" + str(type_num) + ", dim=" +
str(dim) + " => best acc = " + str(max_acc))
print("model: a="+str(best_a)+", R="+str(best_r) + ",C="+str(C))
print("label(0-20:positive sample):")
print(best_label)
(50, 121000)
121000
(50, 121000) (50, 1) (30, 121000) (30, 1)
(30, 121000)
epoch 0
m: 30 n: 121000
epoch 1
m: 30 n: 121000
epoch 2
m: 30 n: 121000
epoch 3
m: 30 n: 121000
epoch 4
m: 30 n: 121000
epoch 5
m: 30 n: 121000
epoch 6
m: 30 n: 121000
epoch 7
m: 30 n: 121000
epoch 8
m: 30 n: 121000
epoch 9
m: 30 n: 121000
epoch 10
m: 30 n: 121000
epoch 11
m: 30 n: 121000
epoch 12
m: 30 n: 121000
epoch 13
m: 30 n: 121000
epoch 14
m: 30 n: 121000
epoch 15
m: 30 n: 121000
epoch 16
m: 30 n: 121000
epoch 17
m: 30 n: 121000
epoch 18
m: 30 n: 121000
epoch 19
m: 30 n: 121000
epoch 20
m: 30 n: 121000
epoch 21
m: 30 n: 121000
epoch 22
m: 30 n: 121000
epoch 23
m: 30 n: 121000
epoch 24
m: 30 n: 121000
epoch 25
m: 30 n: 121000
epoch 26
m: 30 n: 121000
epoch 27
m: 30 n: 121000
epoch 28
m: 30 n: 121000
epoch 29
m: 30 n: 121000
epoch 30
m: 30 n: 121000
epoch 31
m: 30 n: 121000
epoch 32
m: 30 n: 121000
epoch 33
m: 30 n: 121000
epoch 34
m: 30 n: 121000
epoch 35
m: 30 n: 121000
epoch 36
m: 30 n: 121000
epoch 37
m: 30 n: 121000
epoch 38
m: 30 n: 121000
epoch 39
m: 30 n: 121000
epoch 40
m: 30 n: 121000
epoch 41
m: 30 n: 121000
epoch 42
m: 30 n: 121000
epoch 43
m: 30 n: 121000
epoch 44
m: 30 n: 121000
epoch 45
m: 30 n: 121000
epoch 46
m: 30 n: 121000
epoch 47
m: 30 n: 121000
epoch 48
m: 30 n: 121000
epoch 49
m: 30 n: 121000
train type:0, dim=10 => best acc = 0.8666666666666667
model: a=[0.18466436 0.06578467 0.03577375 … 0.0685853 0.02567655 0.05139069], R=27.083401210078584,C=0.1
label(0-20:positive sample):
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1]
In [187]:
a,R,best_acc,best_label,best_a,best_r
R=np.array(R)
best_acc=np.array(best_acc)
best_r=np.array(best_r)
In [181]:
存储多个变量
np.savez(“result.npz”,a=a, R=R, best_acc=best_acc, best_label=best_label, best_a=best_a,best_r=best_r) #使用前面的名字命名变量
In [192]:
r = np.load(“result.npz”) #加载一次即可
a=r[‘best_a’]
R=r[‘best_r’]
acc=r[‘best_acc’]
In [193]:
visualize
plt.figure()
plt.scatter(test_data[:125, 100], test_data[:125, 101], c=’orange’)
plt.scatter(test_data[125:, 100], test_data[125:, 101], c=’black’, alpha=0.3)
plt.scatter(train_data[:, 0], train_data[:, 1], c=’r’, alpha=0.1)
plt.title(“C = “ + str(C) + “ toler = “ + str(toler) +
“ R = “ + str(R)[0:4] + “ acc = “ + str(acc)[:4])
theta = np.arange(0, 2 * np.pi, 0.01)
x = a[0] + R * np.cos(theta)
y = a[1] + R * np.sin(theta)
plt.plot(x, y)
plt.show()
In [194]:
visualize for various dimenshion
plt.figure(figsize=(10, 20))
for i in range(9):
plt.subplot(5,2,i+1)
plt.scatter(test_data[:125, i], test_data[:125,i+1 ], c=’orange’)
plt.scatter(test_data[125:, i], test_data[125:,i+1 ], c=’black’, alpha=0.3)
plt.scatter(train_data[:, i], train_data[:, i+1], c=’r’, alpha=0.1)
plt.title(“C = “ + str(C) + “ toler = “ + str(toler) +
“ R = “ + str(R)[0:4] + “ acc = “ + str(acc)[:4])
theta = np.arange(0, 2 * np.pi, 0.01)
x = a[i] + R * np.cos(theta)
y = a[i+1] + R * np.sin(theta)
plt.plot(x, y)
plt.show()