天气情况图像分类练习赛第三阶段（赛中感）

第三阶段也是实战阶段，不同于前两个阶段的填空而是实打实的预测分析
题目会给出8000张照片数据，其中6000作为训练集而另外2000张作位测试集，通过对6000张的训练来预测2000的结果，并将结果输出到csv文件中，提交检验成功
我们之前学了一阵子的TensorFlow，对神经网络的搭建有的大体的认识，而且在网上也轻松找到对应的模板，我们打算根据本题修改模板使其为之所用
在与同学的一起努力之下，初步代码已经完成，我们又进行修改和完善，最后成型（见如下代码）

import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
import cv2
import keras
from keras import datasets, layers, modelsos.environ['KERAS_BACKEND'] = 'tensorflow'js_path = '/home/kesci/input/weather_image1552/train.json'
test_path = '/home/kesci/input/weather_image1552/测试集/'
train_path = '/home/kesci/input/weather_image1552/训练集/'
import json
testdata=400#测试集数量
path = '/home/kesci/input/weather_image1552/train.json'
with open(path, 'r') as f:label = json.load(f)def read_image(paths):os.listdir(paths)filelist = []for root, dirs, files in os.walk(paths):for file in files:if os.path.splitext(file)[1] == ".jpg":filelist.append(os.path.join(root, file))return filelistdef im_resize(paths):for filename in paths:with Image.open(filename) as im:newim = im.resize((128, 128))newim.save(filename)def im_array(paths):M = []for filename in paths:im = Image.open(filename)im_L = im.convert("L")im_L = im_L.resize((128, 128))Core = im_L.getdata()arr1 = np.array(Core, dtype='float32') / 255.0list_img = arr1.tolist()M.extend(list_img)return M# mp={'cloudy':0,'sunny':1}
dict_label={0:'1',1:'0'}
mp = {'sunny': 0, 'cloudy': 1}
# label=[0]*len(filelist_1)+[1]*len(filelist_2)
js_pic = []
js_lab = []
cnt = 0
for key in label:if (cnt < testdata):js_pic.append(key)js_lab.append(mp[label[key]])cnt += 1
train_lables = np.array(js_lab)tot = []
cnt=0
for key in label:if (cnt < 2000):tot.append(key)cnt += 1features = []
filelist = []
for i in range(len(js_pic)):img = cv2.imread(train_path + js_pic[i], 0)#print(train_path + js_pic[i])filelist.append(train_path + js_pic[i])
trainfilelist = filelist
M = []
M = im_array(trainfilelist)
train_images=np.array(M).reshape(len(trainfilelist),128,128)train_images = train_images[ ..., np.newaxis ]
#print(train_images)
# X = np.array(list(zip(x1,x2))).reshape(len(x1), 2)
# train_images=np.array(M)
# train_images = train_images[ ..., np.newaxis ]# 神经网络
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])model.fit(train_images, train_lables, epochs=10)
# ,batch_size=400
# print(model.evaluate(train_images,train_lables))a=[]
# test = r'C:\Users\carvi\Desktop\人工智能\天气识别\test'
filelist = read_image(test_path)
im_resize(filelist)
for i in range(2000):im = Image.open(test_path + tot[i])#print(test_path + tot[i])im_L = im.convert("L")Core = im_L.getdata()arr1 = np.array(Core, dtype='float32') / 255.0list_img = arr1.tolist()images = np.array(list_img).reshape(-1, 128, 128, 1)predictions_single = model.predict(images)#print("预测结果为:", dict_label[np.argmax(predictions_single)])#print("预测结果为:", np.argmax(predictions_single))a.append(np.argmax(predictions_single))#print(predictions_single)np.savetxt('/home/kesci/input/new.csv',a,delimiter = ',')  
print(a)
"""
for filename in filelist:im = Image.open(filename)#print(filename)im_L = im.convert("L")Core = im_L.getdata()arr1 = np.array(Core, dtype='float32') / 255.0list_img = arr1.tolist()images = np.array(list_img).reshape(-1, 128, 128, 1)predictions_single = model.predict(images)print("预测结果为:", np.argmax(predictions_single))print(predictions_single)
"""

在这里插入图片描述

搭建神经网络
所预测结果
在这里插入图片描述

但是对我们来说还有个巨大的麻烦，就是如何输出数据结果到csv文件，这可属实困扰到我，我查阅大量资料，但是最终效果总是不能让我满意，最后我想到一个方法：将答案结果输入到一个数组里，然后放在txt文件中，然后我再写另一个程序读取txt文件，然后输出到csv文件中，通过中折的方法达到我的目的。
输出程序如图

import os
import numpy as np
import  pandas as pd
# !/usr/bin/python
# coding = UFT-8
data = pd.read_table('C:\\Users\DELL\Desktop\活动\人工智能\图像\ceshi.txt',sep='\n')
#header=None:没有每列的column name，可以自己设定
#encoding='gb2312':其他编码中文显示错误
#sep=',': ','隔开
data1 = pd.DataFrame(data)
data1.to_csv('C:\\Users\DELL\Desktop\活动\人工智能\图像\data1.csv',sep='\n',index=False)
#data1 = pd.DataFrame(arr1, header = False, index = False) # header:原第一行的索引，index:原第一列的索引
#data1.to_csv('C:\\Users\DELL\Desktop\活动\人工智能\图像\data1.csv\data1.csv',sep='\n')

但是提交最终结果后发现得分只有0.5，实属懵逼了。我辛辛苦苦做了一阵子争取率只有一半，和刚开始蒙的一样，（我一开始把结果全部预测为1，就是纯瞎蒙的答案提交上去，看看能得到多少分，没想到是0.5）
还有个问题就是，按理说训练集越大正确率越高，但是实际是我6000个数据的训练正确率只有百分之50多，但是400个训练集却有百分之八十多，有时甚至到百分之九十几，搞不清为什么

最终提交的csv文件如图
在这里插入图片描述

继续搞吧，唉，路还长着呢~