将 CNN 拟合到数据上
接下来是将 CNN 模型拟合到我们的数据集上,这样模型将从训练数据集中学习并更新权重。这个经过训练的 CNN 模型可以进一步用于获得对我们测试数据集的最终预测。我们必须遵循一些先决条件,例如降低学习率、找到模型的最佳权重并保存这些计算出的权重,以便我们可以进一步使用它们进行测试和获得预测。
根据我们的常识,我们需要以下内容
模型的最佳权重
降低学习率
保存模型的最后权重
lrr = ReduceLROnPlateau(monitor='val_acc',
patience=3,
verbose=1,
factor=0.4,
min_lr=0.00001)
filepath="drive/DataScience/PlantReco/weights.best_{epoch:02d}-{val_acc:.2f}.hdf5"
checkpoints = ModelCheckpoint(filepath, monitor='val_acc',
verbose=1, save_best_only=True, mode='max')
filepath="drive/DataScience/PlantReco/weights.last_auto4.hdf5"
checkpoints_full = ModelCheckpoint(filepath, monitor='val_acc',
verbose=1, save_best_only=False, mode='max')
callbacks_list = [checkpoints, lrr, checkpoints_full]
#MODEL
# hist = model.fit_generator(datagen.flow(trainX, trainY, batch_size=75),
# epochs=35, validation_data=(testX, testY),
# steps_per_epoch=trainX.shape[0], callbacks=callbacks_list)
# LOADING MODEL
model.load_weights("../input/plantrecomodels/weights.best_17-0.96.hdf5")
dataset = np.load("../input/plantrecomodels/Data.npz")
data = dict(zip(("x_train","x_test","y_train", "y_test"), (dataset[k] for k in dataset)))
x_train = data['x_train']
x_test = data['x_test']
y_train = data['y_train']
y_test = data['y_test']
print(model.evaluate(x_train, y_train)) # Evaluate on train set
print(model.evaluate(x_test, y_test)) # Evaluate on test set
混淆矩阵
混淆矩阵是一种检查我们的模型如何处理数据的方法。这是分析模型错误的好方法。检查以下代码以获取混淆矩阵
# PREDICTIONS
y_pred = model.predict(x_test)
y_class = np.argmax(y_pred, axis = 1)
y_check = np.argmax(y_test, axis = 1)
cmatrix = confusion_matrix(y_check, y_class)
print(cmatrix)
获得预测
在最后一部分,我们将获得对测试数据集的预测。
检查以下代码以使用经过训练的模型获取预测
path_to_test = '../input/plant-seedlings-classification/test.png'
pics = glob(path_to_test)
testimages = []
tests = []
count=1
num = len(pics)
for i in pics:
print(str(count)+'/'+str(num),end='r')
tests.append(i.split('/')[-1])
testimages.append(cv2.resize(cv2.imread(i),(scale,scale)))
count = count + 1
testimages = np.asarray(testimages)
newtestimages = []
sets = []
getEx = True
for i in testimages:
blurr = cv2.GaussianBlur(i,(5,5),0)
hsv = cv2.cvtColor(blurr,cv2.COLOR_BGR2HSV)
lower = (25,40,50)
upper = (75,255,255)
mask = cv2.inRange(hsv,lower,upper)
struc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))
mask = cv2.morphologyEx(mask,cv2.MORPH_CLOSE,struc)
boolean = mask>0
masking = np.zeros_like(i,np.uint8)
masking[boolean] = i[boolean]
newtestimages.append(masking)
if getEx:
plt.subplot(2,3,1);plt.imshow(i)
plt.subplot(2,3,2);plt.imshow(blurr)
plt.subplot(2,3,3);plt.imshow(hsv)
plt.subplot(2,3,4);plt.imshow(mask)
plt.subplot(2,3,5);plt.imshow(boolean)
plt.subplot(2,3,6);plt.imshow(masking)
plt.show()
getEx=False
newtestimages = np.asarray(newtestimages)
# OTHER MASKED IMAGES
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(newtestimages[i])
Newtestimages=newtestimages/255
prediction = model.predict(newtestimages)
# PREDICTION TO A CSV FILE
pred = np.argmax(prediction,axis=1)
predStr = labels.classes_[pred]
result = {'file':tests,'species':predStr}
result = pd.DataFrame(result)
result.to_csv("Prediction.csv",index=False)、
尾注
所以在本文中,我们详细讨论了使用 CNN进行植物幼苗分类。希望你能从文中学到一些东西,它会在未来对你有所帮助。