深度学习项目入门——让你更接近数据科学的梦想
表情符号或头像是表示非语言暗示的方式。这些暗示已成为在线聊天、产品评论、品牌情感等的重要组成部分。这也促使数据科学领域越来越多的研究致力于表情驱动的故事讲述。
随着计算机视觉和深度学习的进步,现在可以从图像中检测人类情感。在这个深度学习项目中,我们将对人类面部表情进行分类,并过滤和映射相应的表情符号或头像。
关于数据集
面部表情识别数据集(FER2013)包含48*48像素的灰度人脸图像。这些图像中心对齐,并且占据相同的空间。该数据集包含以下类别的面部情绪:
- 0: 愤怒
- 1: 厌恶
- 2: 恐惧
- 3: 快乐
- 4: 悲伤
- 5: 惊讶
- 6: 自然
下载数据集: 面部表情识别数据集
下载项目代码: 表情生成器项目源代码
使用深度学习创建你的表情
我们将构建一个深度学习模型来分类图像中的面部表情,然后将分类的情绪映射到表情符号或头像。
使用CNN进行面部情绪识别
在以下步骤中,我们将构建一个卷积神经网络架构,并在FER2013数据集上训练模型,以从图像中识别情绪。
-
导入库:
import numpy as np import cv2 from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D from keras.optimizers import Adam from keras.layers import MaxPooling2D from keras.preprocessing.image import ImageDataGenerator
-
初始化训练和验证生成器:
train_dir = 'data/train' val_dir = 'data/test' train_datagen = ImageDataGenerator(rescale=1./255) val_datagen = ImageDataGenerator(rescale=1./255) train_generator = train_datagen.flow_from_directory(train_dir,target_size=(48,48),batch_size=64,color_mode="grayscale",class_mode='categorical') validation_generator = val_datagen.flow_from_directory(val_dir,target_size=(48,48),batch_size=64,color_mode="grayscale",class_mode='categorical')
-
构建卷积网络架构:
emotion_model = Sequential() emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1))) emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Dropout(0.25)) emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu')) emotion_model.add(MaxPooling2D(pool_size=(2, 2))) emotion_model.add(Dropout(0.25)) emotion_model.add(Flatten()) emotion_model.add(Dense(1024, activation='relu')) emotion_model.add(Dropout(0.5)) emotion_model.add(Dense(7, activation='softmax'))
-
编译和训练模型:
emotion_model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy']) emotion_model_info = emotion_model.fit_generator(train_generator,steps_per_epoch=28709 // 64,epochs=50,validation_data=validation_generator,validation_steps=7178 // 64)
-
保存模型权重:
emotion_model.save_weights('model.h5')
-
使用OpenCV的haarcascade xml检测网络摄像头中人脸的边界框,并预测情绪:
cv2.ocl.setUseOpenCL(False) emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"} cap = cv2.VideoCapture(0) while True:ret, frame = cap.read()if not ret:breakbounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)num_faces = bounding_box.detectMultiScale(gray_frame, scaleFactor=1.3, minNeighbors=5)for (x, y, w, h) in num_faces:cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)roi_gray_frame = gray_frame[y:y + h, x:x + w]cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)emotion_prediction = emotion_model.predict(cropped_img)maxindex = int(np.argmax(emotion_prediction))cv2.putText(frame, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)cv2.imshow('Video', cv2.resize(frame, (1200,860), interpolation=cv2.INTER_CUBIC))if cv2.waitKey(1) & 0xFF == ord('q'):break cap.release() cv2.destroyAllWindows()
GUI和表情符号映射的代码
创建一个名为emojis的文件夹,并保存数据集中每种情绪对应的表情符号。
将以下代码粘贴到gui.py中并运行文件。
import tkinter as tk
from tkinter import *
import cv2
from PIL import Image, ImageTk
import os
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D
from keras.optimizers import Adam
from keras.layers import MaxPooling2Demotion_model = Sequential()
emotion_model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
emotion_model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
emotion_model.add(MaxPooling2D(pool_size=(2, 2)))
emotion_model.add(Dropout(0.25))
emotion_model.add(Flatten())
emotion_model.add(Dense(1024, activation='relu'))
emotion_model.add(Dropout(0.5))
emotion_model.add(Dense(7, activation='softmax'))
emotion_model.load_weights('model.h5')cv2.ocl.setUseOpenCL(False)
emotion_dict = {0: " Angry ", 1: "Disgusted", 2: " Fearful ", 3: " Happy ", 4: " Neutral ", 5: " Sad ", 6: "Surprised"}
emoji_dist={0:"./emojis/angry.png", 1:"./emojis/disgusted.png", 2:"./emojis/fearful.png", 3:"./emojis/happy.png", 4:"./emojis/neutral.png", 5:"./emojis/sad.png", 6:"./emojis/surprised.png"}global last_frame1
last_frame1 = np.zeros((480, 640, 3), dtype=np.uint8)
global cap1
show_text=[0]def show_vid():cap1 = cv2.VideoCapture(0)if not cap1.isOpened():print("cant open the camera1")flag1, frame1 = cap1.read()frame1 = cv2.resize(frame1, (600,500))bounding_box = cv2.CascadeClassifier('/home/shivam/.local/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml')gray_frame = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)num_faces = bounding_box.detectMultiScale(gray_frame, scaleFactor=1.3, minNeighbors=5)for (x, y, w, h) in num_faces:cv2.rectangle(frame1, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)roi_gray_frame = gray_frame[y:y + h, x:x + w]cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray_frame, (48, 48)), -1), 0)prediction = emotion_model.predict(cropped_img)maxindex = int(np.argmax(prediction))cv2.putText(frame1, emotion_dict[maxindex], (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)show_text[0]=maxindexif flag1 is None:print("Major error!")elif flag1:global last_frame1last_frame1 = frame1.copy()pic = cv2.cvtColor(last_frame1, cv2.COLOR_BGR2RGB)img = Image.fromarray(pic)imgtk = ImageTk.PhotoImage(image=img)lmain.imgtk = imgtklmain.configure(image=imgtk)lmain.after(10, show_vid)if cv2.waitKey(1) & 0xFF == ord('q'):exit()def show_vid2():frame2 = cv2.imread(emoji_dist[show_text[0]])pic2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)img2 = Image.fromarray(pic2)imgtk2 = ImageTk.PhotoImage(image=img2)lmain2.imgtk2 = imgtk2lmain3.configure(text=emotion_dict[show_text[0]], font=('arial',45,'bold'))lmain2.configure(image=imgtk2)lmain2.after(10, show_vid2)if __name__ == '__main__':root = tk.Tk()img = ImageTk.PhotoImage(Image.open("logo.png"))heading = Label(root, image=img, bg='black')heading.pack()heading2 = Label(root, text="Photo to Emoji", pady=20, font=('arial',45,'bold'), bg='black', fg='#CDCDCD')heading2.pack()lmain = tk.Label(master=root, padx=50, bd=10)lmain2 = tk.Label(master=root, bd=10)lmain3 = tk.Label(master=root, bd=10, fg="#CDCDCD", bg='black')lmain.pack(side=LEFT)lmain.place(x=50,y=250)lmain3.pack()lmain3.place(x=960,y=250)lmain2.pack(side=RIGHT)lmain2.place(x=900,y=350)root.title("Photo To Emoji")root.geometry("1400x900+100+10")root['bg']='black'exitbutton = Button(root, text='Quit', fg="red", command=root.destroy, font=('arial',25,'bold')).pack(side = BOTTOM)show_vid()show_vid2()root.mainloop()
总结
在这个面向初学者的深度学习项目中,我们构建了一个卷积神经网络来识别面部情绪。我们在FER2013数据集上训练了我们的模型,然后将这些情绪映射到相应的情绪符号或头像。
使用OpenCV的haar cascade xml,我们获得了网络摄像头中人脸的边界框,然后将这些框输入到训练好的模型中进行分类。
DataFlair致力于为用户提供成为数据科学家所需的所有资源,包括详细的教程、实践、用例以及项目源代码。
参考资料
参考资料名称 | 链接 |
---|---|
FER2013数据集 | Kaggle FER2013 Dataset |
Keras官方文档 | Keras Official Documentation |
OpenCV官方文档 | OpenCV Official Documentation |
TensorFlow官方文档 | TensorFlow Official Documentation |
数据科学教程 | DataFlair Data Science Tutorials |
深度学习项目 | DataFlair Deep Learning Projects |
图像分类教程 | Image Classification Tutorial |
CNN架构详解 | CNN Architecture Explained |
数据增强技术 | Data Augmentation Techniques |
GUI编程指南 | GUI Programming Guide |
表情符号使用指南 | Emoji Usage Guide |
情感分析研究 | Emotion Recognition Research |
深度学习框架比较 | Deep Learning Framework Comparison |
计算机视觉应用 | Computer Vision Applications |
请注意,这些链接可能会随着时间的推移而更改,建议访问时检查其可用性。