天池入门赛--蒸汽预测

首先查看数据 

#coding:utf-8
"""
Created on Wen Jan 9 2019@author: fzh
"""
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RepeatedKFold, cross_val_score,cross_val_predict,KFold
from sklearn.metrics import make_scorer,mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import LinearSVR, SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import PolynomialFeatures,MinMaxScaler,StandardScaler
from sklearn.metrics import make_scorer
from scipy import stats
import os#load_dataset
with open("data/zhengqi_train.txt")  as fr:data_train=pd.read_table(fr,sep="\t")print('data_train.shape=',data_train.shape)with open("data/zhengqi_test.txt") as fr_test:data_test=pd.read_table(fr_test,sep="\t")print('data_test.shape=',data_test.shape)
#merge train_set and test_set  add origin
data_train["oringin"]="train"
data_test["oringin"]="test"
data_all=pd.concat([data_train,data_test],axis=0,ignore_index=True)
#View data
print('data_all.shape=',data_all.shape)
# Explore feature distibution
fig = plt.figure(figsize=(6, 6))
for column in data_all.columns[0:-2]:g = sns.kdeplot(data_all[column][(data_all["oringin"] == "train")], color="Red", shade = True)g = sns.kdeplot(data_all[column][(data_all["oringin"] == "test")], color="Blue", shade= True)g.set(xlabel=column,ylabel='Frequency')g = g.legend(["train","test"])plt.show()

可看出,train有2888行数据,test有1925行数据,然后查看train和test的每列数据的分布,横坐标是特征名,从v0到37,下面选择了v4和v5,可看出v4的train和test分布接近,而v5相差较大,类似较大的还有"V9","V11","V17","V22","V28",故可以删掉这些列特征。

fig = plt.figure(figsize=(10, 10))
for i in range(len(data_all.columns)-2):#初始网格g = sns.FacetGrid(data_all, col='oringin')#利用map方法可视化直方图g = g.map(sns.distplot, data_all.columns[i])plt.savefig('distplot'+str(i)+'.jpg')

这里选取了v5

删掉"V5","V9","V11","V17","V22","V28"这些列特征。 

"""删除特征"V5","V9","V11","V17","V22","V28",训练集和测试集分布不一致"""
data_all.drop(["V5","V9","V11","V17","V22","V28"],axis=1,inplace=True)
print('drop after data_all.shape=',data_all.shape)

"""删除特征"V5","V9","V11","V17","V22","V28",训练集和测试集分布不一致"""
data_all.drop(["V5","V9","V11","V17","V22","V28"],axis=1,inplace=True)
print('drop after data_all.shape=',data_all.shape)# figure parameters
data_train= data_all[data_all["oringin"] == "train"].drop("oringin", axis=1)
print('drop after data_train.shape=',data_train.shape)"""找出相关程度"""
plt.figure(figsize=(20, 16))  # 指定绘图对象宽度和高度
colnm = data_train.columns.tolist()  # 列表头
mcorr = data_train.corr(method="spearman")  # 相关系数矩阵,即给出了任意两个变量之间的相关系数
print('mcorr.shape=',mcorr.shape)
mask = np.zeros_like(mcorr, dtype=np.bool)  # 构造与mcorr同维数矩阵 为bool型
#画上三角相关系数矩阵
mask[np.triu_indices_from(mask)] = True  # 上三角为1
g = sns.heatmap(mcorr, mask=mask, cmap=plt.cm.jet, square=True, annot=True, fmt='0.2f')  # 热力图(看两两相似度)
plt.savefig('mcorr.jpg')

丢掉与target相关系数小于0.1的特征 

# Threshold for removing correlated variables
threshold = 0.1
# Absolute value correlation matrix
corr_matrix = data_train.corr().abs()
drop_col=corr_matrix[corr_matrix["target"]<threshold].index
print('drop_col=',drop_col)
data_all.drop(drop_col,axis=1,inplace=True)
print('data_all.shape=',data_all.shape)

对每一列特征进行最小最大归一化 

"""归一化"""
cols_numeric=list(data_all.columns)
cols_numeric.remove("oringin")
def scale_minmax(col):return (col-col.min())/(col.max()-col.min())
scale_cols = [col for col in cols_numeric if col!='target']
print('scale_cols=',scale_cols)
data_all[scale_cols] = data_all[scale_cols].apply(scale_minmax,axis=0)
print('data_all[scale_cols].shape=',data_all[scale_cols].shape)

划分train,valid,和test

"""function to get training samples"""
def get_training_data():df_train = data_all[data_all["oringin"]=="train"]print('df_train.shape=',df_train.shape)y = df_train.targetX = df_train.drop(["oringin","target"],axis=1)X_train,X_valid,y_train,y_valid=train_test_split(X,y,test_size=0.3,random_state=100)return X_train,X_valid,y_train,y_valid"""extract test data (without SalePrice)"""
def get_test_data():df_test = data_all[data_all["oringin"]=="test"].reset_index(drop=True)return df_test.drop(["oringin","target"],axis=1)"""metric for evaluation"""
def rmse(y_true, y_pred):diff = y_pred - y_truesum_sq = sum(diff ** 2)n = len(y_pred)return np.sqrt(sum_sq / n)def mse(y_ture, y_pred):return mean_squared_error(y_ture, y_pred)

利用Ridge回归去掉边界点,边界点满足误差的正态分布方差大于3

"""function to detect outliers based on the predictions of a model"""
def find_outliers(model, X, y, sigma=3):# predict y values using modeltry:y_pred = pd.Series(model.predict(X), index=y.index)# if predicting fails, try fitting the model firstexcept:model.fit(X, y)y_pred = pd.Series(model.predict(X), index=y.index)# calculate residuals between the model prediction and true y valuesresid = y - y_predmean_resid = resid.mean()std_resid = resid.std()# calculate z statistic, define outliers to be where |z|>sigmaz = (resid - mean_resid) / std_resid#找出方差大于3的数据的索引,然后丢掉outliers = z[abs(z) > sigma].index# print and plot the resultsprint('score=', model.score(X, y))print('rmse=', rmse(y, y_pred))print("mse=", mean_squared_error(y, y_pred))print('---------------------------------------')print('mean of residuals:', mean_resid)print('std of residuals:', std_resid)print('---------------------------------------')print(len(outliers), 'outliers:')print(outliers.tolist())plt.figure(figsize=(15, 5))plt.subplot(1, 3, 1)plt.plot(y, y_pred, '.')plt.plot(y.loc[outliers], y_pred.loc[outliers], 'ro')plt.legend(['Accepted', 'Outlier'])plt.xlabel('y')plt.ylabel('y_pred')plt.subplot(1, 3, 2)plt.plot(y, y - y_pred, '.')plt.plot(y.loc[outliers], y.loc[outliers] - y_pred.loc[outliers], 'ro')plt.legend(['Accepted', 'Outlier'])plt.xlabel('y')plt.ylabel('y - y_pred')plt.subplot(1, 3, 3)plt.hist(z,bins=50)plt.hist(z.loc[outliers],color='r', bins=50)plt.legend(['Accepted', 'Outlier'])plt.xlabel('normal res error')plt.ylabel('frequency')plt.savefig('outliers.png')return outliers# get training data
from sklearn.linear_model import Ridge
X_train, X_valid,y_train,y_valid = get_training_data()
# find and remove outliers using a Ridge model
outliers = find_outliers(Ridge(), X_train, y_train)

将移除的点进行训练

""" permanently remove these outliers from the data"""
X_t=X_train.drop(outliers)
y_t=y_train.drop(outliers)
#
def get_trainning_data_omitoutliers():y1=y_t.copy()X1=X_t.copy()return X1,y1from sklearn.preprocessing import StandardScalerdef train_model(model, param_grid,splits=5, repeats=5):X, y = get_trainning_data_omitoutliers()poly_trans=PolynomialFeatures(degree=2)X=poly_trans.fit_transform(X)X=MinMaxScaler().fit_transform(X)# create cross-validation methodrkfold = RepeatedKFold(n_splits=splits, n_repeats=repeats)# perform a grid search if param_grid givenif len(param_grid) > 0:# setup grid search parametersgsearch = GridSearchCV(model, param_grid, cv=rkfold,scoring="neg_mean_squared_error",verbose=1, return_train_score=True)# search the gridgsearch.fit(X, y)# extract best model from the gridmodel = gsearch.best_estimator_best_idx = gsearch.best_index_# get cv-scores for best modelgrid_results = pd.DataFrame(gsearch.cv_results_)cv_mean = abs(grid_results.loc[best_idx, 'mean_test_score'])cv_std = grid_results.loc[best_idx, 'std_test_score']# no grid search, just cross-val score for given modelelse:grid_results = []cv_results = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=rkfold)cv_mean = abs(np.mean(cv_results))cv_std = np.std(cv_results)# combine mean and std cv-score in to a pandas seriescv_score = pd.Series({'mean': cv_mean, 'std': cv_std})# predict y using the fitted modely_pred = model.predict(X)# print stats on model performanceprint('----------------------')print(model)print('----------------------')print('score=', model.score(X, y))print('rmse=', rmse(y, y_pred))print('mse=', mse(y, y_pred))print('cross_val: mean=', cv_mean, ', std=', cv_std)return model, cv_score, grid_results
#
# places to store optimal models and scores
opt_models = dict()
score_models = pd.DataFrame(columns=['mean','std'])# no. k-fold splits
splits=5
# no. k-fold iterations
repeats=5print('=========Ridge model========================')
model = 'Ridge'
opt_models[model] = Ridge()
alph_range = np.arange(0.25,6,0.25)
param_grid = {'alpha': alph_range}
opt_models[model],cv_score,grid_results = train_model(opt_models[model], param_grid=param_grid,splits=splits, repeats=repeats)cv_score.name = model
score_models = score_models.append(cv_score)plt.figure()
plt.errorbar(alph_range, abs(grid_results['mean_test_score']),abs(grid_results['std_test_score'])/np.sqrt(splits*repeats))
plt.xlabel('alpha')
plt.ylabel('score')
plt.show()
#print('===========RandomForest model================')
#model = 'RandomForest'
#opt_models[model] = RandomForestRegressor()
#param_grid = {'n_estimators':[100,150,200],'max_features':[8,12,16,20,24],'min_samples_split':[2,4,6]}#opt_models[model], cv_score, grid_results = train_model(opt_models[model], param_grid=param_grid,splits=5, repeats=1)
#cv_score.name = model
#score_models = score_models.append(cv_score)
#print('score_models=',score_models)
#import pickle
#with open("prediction.pkl", "wb") as f:
#    pickle.dump(opt_models[model], f)

明显score高了。

后面用随机森林训练了一版并进行了保存。

完整train.py

#coding:utf-8
"""
Created on Wen Jan 9 2019@author: fzh
"""
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RepeatedKFold, cross_val_score,cross_val_predict,KFold
from sklearn.metrics import make_scorer,mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import LinearSVR, SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor,AdaBoostRegressor
# from xgboost import XGBRegressor
from sklearn.preprocessing import PolynomialFeatures,MinMaxScaler,StandardScaler
from sklearn.metrics import make_scorer
import os
from scipy import stats
from sklearn.preprocessing import StandardScaler
from beautifultable import BeautifulTable
import pickle
from sklearn.ensemble import ExtraTreesRegressor,AdaBoostRegressor
from sklearn.ensemble import  AdaBoostClassifierdef del_feature(data_train,data_test):data_train["oringin"]="train"data_test["oringin"]="test"data_all=pd.concat([data_train,data_test],axis=0,ignore_index=True)"""删除特征"V5","V9","V11","V17","V22","V28",训练集和测试集分布不一致"""data_all.drop(["V5","V9","V11","V17","V22","V28"],axis=1,inplace=True)# print('drop after data_all.shape=',data_all.shape)# figure parametersdata_train= data_all[data_all["oringin"] == "train"].drop("oringin", axis=1)# print('drop after data_train.shape=',data_train.shape)"""'V14', u'V21', u'V25', u'V26', u'V32', u'V33', u'V34'"""# Threshold for removing correlated variablesthreshold = 0.1# Absolute value correlation matrixcorr_matrix = data_train.corr().abs()drop_col=corr_matrix[corr_matrix["target"]<threshold].index# print('drop_col=',drop_col)data_all.drop(drop_col,axis=1,inplace=True)# print('data_all.shape=',data_all.shape)return data_all
"""function to get training samples"""
def get_training_data(data_all):df_train = data_all[data_all["oringin"]=="train"]y = df_train.targetX = df_train.drop(["oringin","target"],axis=1)X_train,X_valid,y_train,y_valid=train_test_split(X,y,test_size=0.3,random_state=100)return X_train,X_valid,y_train,y_valid"""extract test data (without SalePrice)"""
def get_test_data(data_all):df_test = data_all[data_all["oringin"]=="test"].reset_index(drop=True)return df_test.drop(["oringin","target"],axis=1)
#
"""metric for evaluation"""
def rmse(y_true, y_pred):diff = y_pred - y_truesum_sq = sum(diff ** 2)n = len(y_pred)return np.sqrt(sum_sq / n)def mse(y_ture, y_pred):return mean_squared_error(y_ture, y_pred)"""function to detect outliers based on the predictions of a model"""
def find_outliers(model, X, y, sigma=3):# predict y values using modeltry:y_pred = pd.Series(model.predict(X), index=y.index)# if predicting fails, try fitting the model firstexcept:model.fit(X, y)y_pred = pd.Series(model.predict(X), index=y.index)# calculate residuals between the model prediction and true y valuesresid = y - y_predmean_resid = resid.mean()std_resid = resid.std()# calculate z statistic, define outliers to be where |z|>sigmaz = (resid - mean_resid) / std_resid#找出方差大于3的数据的索引,然后丢掉outliers = z[abs(z) > sigma].index# print and plot the resultsprint('score=', model.score(X, y))print('rmse=', rmse(y, y_pred))print("mse=", mean_squared_error(y, y_pred))print('---------------------------------------')print('mean of residuals:', mean_resid)print('std of residuals:', std_resid)print('---------------------------------------')return outliersdef get_trainning_data_omitoutliers(X_t,y_t):y1=y_t.copy()X1=X_t.copy()return X1,y1def scale_minmax(col):return (col - col.min()) / (col.max() - col.min())def normal(data_all):"""归一化"""cols_numeric = list(data_all.columns)cols_numeric.remove("oringin")scale_cols = [col for col in cols_numeric if col != 'target']print('scale_cols=', scale_cols)data_all[scale_cols] = data_all[scale_cols].apply(scale_minmax, axis=0)return data_allif __name__ == '__main__':with open("data/zhengqi_train.txt")  as fr:data_train = pd.read_table(fr, sep="\t")with open("data/zhengqi_test.txt") as fr_test:data_test = pd.read_table(fr_test, sep="\t")data_all=del_feature(data_train,data_test)print('clear data_all.shape',data_all.shape)data_all=normal(data_all)X_train, X_valid, y_train, y_valid = get_training_data(data_all)print('X_train.shape=', X_train.shape)print('X_valid.shape=', X_valid.shape)X_test=get_test_data(data_all)print('X_test.shape',X_test.shape)# find and remove outliers using a Ridge modeloutliers = find_outliers(Ridge(), X_train, y_train)""" permanently remove these outliers from the data"""X_train,y_train=get_trainning_data_omitoutliers(X_train.drop(outliers),y_train.drop(outliers))X1=pd.concat([X_train,y_train],axis=1)X2=pd.concat([X_valid,y_valid],axis=1)X_all=pd.concat([X1,X2],axis=0)print(X_all)y = X_all['target']X = X_all.drop(["target"], axis=1)print(X.shape)X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=100)poly_trans = PolynomialFeatures(degree=2)X_train = poly_trans.fit_transform(X_train)print(X_train.shape)X_valid = poly_trans.fit_transform(X_valid)print(X_valid.shape)print('==============forest_model========================')forest_model = RandomForestRegressor(n_estimators=500,criterion='mse',max_depth=20,min_samples_leaf=3,max_features=0.4,random_state=1,bootstrap=False,n_jobs=-1)forest_model.fit(X_train,y_train)importance =forest_model.feature_importances_table = BeautifulTable()# table.column_headers = ["feature", "importance"]print('RF feature importance:')# print(data_all)for i, cols in enumerate(X_all.iloc[:, :-1]):table.append_row([cols, round(importance[i], 3)])print(table)y_pred = forest_model.predict(X_valid)y_valid_rmse=rmse(y_valid,y_pred)print('y_valid_rmse=',y_valid_rmse)y_valid_mse = mse(y_valid, y_pred)print('y_valid_mse=',y_valid_mse)y_valid_score=forest_model.score(X_valid,y_valid)print('y_valid_score=',y_valid_score)with open("forest_model.pkl", "wb") as f:pickle.dump(forest_model, f)with open("forest_model.pkl", "rb") as f:model = pickle.load(f)y_pred = model.predict(X_valid)y_valid_rmse = rmse(y_valid, y_pred)print('y_valid_rmse=', y_valid_rmse)y_valid_mse = mse(y_valid, y_pred)print('y_valid_mse=', y_valid_mse)y_valid_score = model.score(X_valid, y_valid)print('y_valid_score=', y_valid_score)

inference.py 如下

#coding:utf-8
"""
Created on Wen Jan 9 2019@author: fzh
"""
import pickle
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures,MinMaxScaler
def del_feature(data_train,data_test):data_train["oringin"]="train"data_test["oringin"]="test"data_all=pd.concat([data_train,data_test],axis=0,ignore_index=True)"""删除特征"V5","V9","V11","V17","V22","V28",训练集和测试集分布不一致"""data_all.drop(["V5","V9","V11","V17","V22","V28"],axis=1,inplace=True)# print('drop after data_all.shape=',data_all.shape)# figure parametersdata_train= data_all[data_all["oringin"] == "train"].drop("oringin", axis=1)# print('drop after data_train.shape=',data_train.shape)"""'V14', u'V21', u'V25', u'V26', u'V32', u'V33', u'V34'"""# Threshold for removing correlated variablesthreshold = 0.1# Absolute value correlation matrixcorr_matrix = data_train.corr().abs()drop_col=corr_matrix[corr_matrix["target"]<threshold].index# print('drop_col=',drop_col)data_all.drop(drop_col,axis=1,inplace=True)# print('data_all.shape=',data_all.shape)return data_all
def scale_minmax(col):return (col - col.min()) / (col.max() - col.min())def normal(data_all):"""归一化"""cols_numeric = list(data_all.columns)cols_numeric.remove("oringin")scale_cols = [col for col in cols_numeric if col != 'target']print('scale_cols=', scale_cols)data_all[scale_cols] = data_all[scale_cols].apply(scale_minmax, axis=0)return data_all"""extract test data (without SalePrice)"""
def get_test_data(data_all):df_test = data_all[data_all["oringin"]=="test"].reset_index(drop=True)return df_test.drop(["oringin","target"],axis=1)
if __name__ == '__main__':with open("data/zhengqi_train.txt")  as fr:data_train=pd.read_table(fr,sep="\t")with open("data/zhengqi_test.txt") as fr_test:data_test=pd.read_table(fr_test,sep="\t")data_all = del_feature(data_train, data_test)print('clear data_all.shape', data_all.shape)data_all=normal(data_all)X_test = get_test_data(data_all)print('X_test.shape', X_test.shape)poly_trans = PolynomialFeatures(degree=2)X_test = poly_trans.fit_transform(X_test)print(X_test.shape)with open("forest_model.pkl", "rb") as f:model = pickle.load(f)X_pre=model.predict(X_test)print(X_pre.shape)X_pre=list(map(lambda x:round(x,3),X_pre))X_pre=np.reshape(X_pre,(-1,1))print(X_pre.shape)X_pre=pd.DataFrame(X_pre)print(X_pre)X_pre.to_csv('result.txt',index=False,header=False)

得分0.1298,排名100多,第一次实践还算可以吧。

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/493362.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

脑科学与AI要想融合发展,目前来说仍很困难

来源&#xff1a;人机与认知实验室摘要&#xff1a;AI要想进一步发展&#xff0c;需要从脑科学得到启发。业界普遍认为&#xff0c;AI未来的演进方向就是计算智能、感知智能和认知智能&#xff0c;在此期间&#xff0c;真正需要突破的就是让计算机理解、思考和进行自我学习&…

张首晟生前重磅演讲:要用第一性原理的思维方式来理解今天的世界

来源&#xff1a;经济学家圈摘要&#xff1a;本文为华裔科学家张首晟今年3月25日在IT领袖峰会上的演讲以下是张首晟教授演讲全文&#xff1a;谢谢大家的关注&#xff0c;在下午来听我的分享&#xff0c;今天大会是IT领袖峰会&#xff0c;所以我想讲的三个题目是「量子计算」、「…

Windows live writer插入代码图片Test

/*** 验证字符串text是不是ip地址&#xff0c;是返回true&#xff0c;否则返回false* param text* return*/ public static boolean isIPAddress(String text){StringBuilder regex new StringBuilder("^(1\\d{2}|2[0-4]\\d|25[0-5]|[1-9]\\d|[1-9])\\."); regex.a…

knn用于水果数据集分类

数据集地址&#xff1a;https://download.csdn.net/download/fanzonghao/10940440 knn算法流程&#xff1a; 若k取无穷大&#xff0c;那么测试数据就取决于每一类的占比&#xff0c;归属于占比最大的那一类。 首先观察数据集&#xff0c;利用mass&#xff0c;height&#xff…

人脸识别技术大起底,你了解多少?

来源&#xff1a;与非网这两年&#xff0c;随着科技的迅速发展&#xff0c;人脸识别已经逐渐成为了新时期生物识别技术应用的重要领域&#xff0c;忘记密码了?没事儿&#xff0c;咱还可以“刷脸”!今天&#xff0c;小编将带大家了解一下最新的人脸识别技术&#xff0c;看看这项…

深度学习时出现的一些安装问题+ubuntu apt的一些问题+github release文件加速

一&#xff0e;python用于深度学习时出现的一些安装问题 问题&#xff1a;raise ImportError, str(msg) , please install the python-tk package 解决&#xff1a;apt-get update apt-get install python-tk 问题&#xff1a;pip install pycocotools出现错误 pip instal…

Struts+Tomcat搭建

StrutsTomcat搭建tomcat使用(服务器端开发): 如果要安装Tomcat需要进行的配置&#xff1a;tomcat安装在c: \Tomcat CATALINA_HOME变量值设为: H:\Program Files (x86)\tomcat\apache-tomcat-7.0.27 CATALINA_BASE变量值设为: H:\Program Files (x86)\tomcat\apache-tomcat-…

AlphaZero登上《科学》封面:一个算法“通杀”三大棋,完整论文首次发布

来源&#xff1a;量子位作者&#xff1a;乾明 一璞 栗子 晓查不仅会下围棋&#xff0c;还自学成才横扫国际象棋和日本将棋的DeepMind AlphaZero&#xff0c;登上了最新一期《科学》杂志封面。同时&#xff0c;这也是经过完整同行审议的AlphaZero论文&#xff0c;首次公开发表。…

自编码器及相关变种算法简介

本文对自编码器&#xff08;Auto-Encoder&#xff09;算法及其相关变种算法进行简要介绍&#xff0c;其中包括 Regularized Auto-Encoder、Sparse Auto-Encoder、Denoising Auto-Encoder 和 Contractive Auto-Encoder&#xff0c;重点讨论各算法的基本思想及优化时所需目标函数…

pytorch数据预处理

一&#xff0c;数据加载 数据路径&#xff1a; #coding:utf-8 import torch as t from torch.utils import data import os from PIL import Image import numpy as npclass DogCat(data.Dataset):def __init__(self, path):imgs os.listdir(path)# 所有图片的绝对路径# 这里…

太阳系“首个星际访客”未被探测到人工信号

来源&#xff1a;新华网去年发现的雪茄形天体被认为是太阳系“首个星际访客”&#xff0c;其真实身份一直受到全球天文学家高度关注。美国搜索外星文明研究所&#xff08;SETI&#xff09;最新研究称&#xff0c;这个天体没有被探测出“人工”的无线电信号&#xff0c;但这不能…

协方差理解,PCA以及奇异值分解(SVD)

一.PCA PCA通过正交变换将一组由线性相关变量表示的数据转换为少数几个由线性无关变量表示的数据&#xff0c;这几个线性无关的变量就是主成分。PCA通过将高维数据维度减少到少数几个维度&#xff0c;本质上属于一种数据降维方法&#xff0c;也可以用来探索数据的内在结构。 …

ASP.NET MVC获取上传的路径

刚才有网友问及&#xff0c;怎样获取<input typefile>的值&#xff1f; Insus.NET测试了一下&#xff0c;在Inetnet Explor之下似乎没有问题&#xff0c;但是FireFox获取到的只是文件名。 在MVC的控制器中&#xff0c;创建一个视图Action&#xff0c;超简单如下&#xff…

python的继承与多态

一.继承 class Person(object):def __init__(self, name, sex):self.name nameself.sex sexdef print_title(self):if self.sex "male":print("man")elif self.sex "female":print("woman")class Child(Person): # Child 继承 …

如何在算法交易中使用AI?摩根大通发布新版指南

来源 &#xff1a;efinancialcareers.com作者 &#xff1a;Sarah Butcher编译&#xff1a;机器之能 张玺摘要&#xff1a;如果你对银行与金融领域的 AI 应用有兴趣的话&#xff0c;你肯定了解 JPM&#xff08;摩根大通&#xff09;最近十年对大数据和人工智能的出色运用&#x…

生命简史

来源「新原理研究所」&#xff08;ID&#xff1a;newprincipia&#xff09;摘要&#xff1a;地球上的生命是如何开始的&#xff1f;未来智能实验室是人工智能学家与科学院相关机构联合成立的人工智能&#xff0c;互联网和脑科学交叉研究机构。未来智能实验室的主要工作包括&…

Array()数组

数组的定义 var arr [1, 2, 3, 4, "one", "two", "three", "four"]; //一维数组 var props [["拳头", "刀", "枪"], ["boxing", "knife ", "gun"]]; //二维数组 调用 …

成立 5 周年:一文览尽 Facebook 人工智能研究院历年重要成果

来源&#xff1a;AI科技评论五年前&#xff0c;Yann LeCun 等人创建了 Facebook 人工智能研究院 (Facebook AI Research&#xff0c;FAIR)&#xff0c;试图通过开放的研究环境促进人工智能的发展&#xff0c;进而造福所有人——他们努力的目的是为了理解智慧的本质&#xff0c;…

利用opencv对图像和检测框做任意角度的旋转

一.钢筋比赛中的数据扩充 #coding:utf-8 #数据集扩增 import cv2 import math import numpy as np import xml.etree.ElementTree as ET import osdef rotate_image(src, angle, scale1):w src.shape[1]h src.shape[0]# 角度变弧度rangle np.deg2rad(angle) # angle in r…

中国科学家将绘制最精细人脑三维“地图”

骆清铭在检查实验结果。&#xff08;受访者供图&#xff09;来源&#xff1a;新华社客户端作者&#xff1a;喻菲 胡喆 李博 夏鹏为什么有的大脑能洞见美妙的宇宙法则&#xff0c;有的能创作出扣人心弦的乐曲与画作&#xff1f;记忆和意识是如何产生的&#xff1f;人类虽已能观察…