算法步骤: 1)将原始数据按列组成n行m列矩阵X 2)特征中心化。即每一维的数据都减去该维的均值,使每一维的均值都为0 3)求出协方差矩阵 4)求出协方差矩阵的特征值及对应的特征向量 5)将特征向量按对应的特征值大小从上往下按行排列成矩阵,取前k行组成矩阵p 6)Y=PX 即为降维到k维后的数据
PCA
public class PCA { public static DenseMatrix64F runPCA ( DenseMatrix64F src, int k) { DenseMatrix64F rs = new DenseMatrix64F ( src. numRows, k) ; DenseMatrix64F norm_X = new DenseMatrix64F ( src. numRows, src. numCols) ; for ( int i = 0 ; i< src. numCols; i++ ) { double tmp= 0 ; for ( int j= 0 ; j< src. numRows; j++ ) { tmp+= src. get ( j, i) ; } tmp /= src. numRows; for ( int j= 0 ; j< src. numRows; j++ ) { norm_X. set ( j, i, src. get ( j, i) - tmp) ; } } DenseMatrix64F norm_X_T = new DenseMatrix64F ( src. numCols, src. numRows) ; CommonOps. transpose ( norm_X, norm_X_T) ; DenseMatrix64F scatter_matrix = new DenseMatrix64F ( src. numCols, src. numCols) ; CommonOps. mult ( norm_X_T, norm_X, scatter_matrix) ; EDInfo ed = JacobiCount ( new DenseMatrix64F ( scatter_matrix) , 0.001 , 1000 ) ; DenseMatrix64F feature = new DenseMatrix64F ( k, src. numCols) ; for ( int i= 0 ; i< k; i++ ) { for ( int j= 0 ; j< src. numCols; j++ ) { feature. set ( i, j, ed. getValues ( ) . get ( j, i) ) ; } } DenseMatrix64F feature_T = new DenseMatrix64F ( src. numCols, k) ; CommonOps. transpose ( feature, feature_T) ; CommonOps. mult ( norm_X, feature_T, rs) ; return rs; } public static EDInfo JacobiCount ( DenseMatrix64F src, double diff, int iter) { DenseMatrix64F values = new DenseMatrix64F ( src. numRows, src. numCols) ; for ( int i= 0 ; i< src. numRows; i++ ) { for ( int j= 0 ; j< src. numCols; j++ ) { if ( i == j) { values. set ( i, j, 1 ) ; } else { values. set ( i, j, 0 ) ; } } } int nCount = 0 ; while ( true ) { double dbMax = Double. MIN_VALUE; int nRow = 0 ; int nCol = 1 ; for ( int i= 0 ; i< src. numRows; i++ ) { for ( int j= 0 ; j< src. numCols; j++ ) { if ( i != j && Math. abs ( src. get ( i, j) ) > dbMax) { dbMax = Math. abs ( src. get ( i, j) ) ; nRow = i; nCol = j; } } } if ( dbMax < diff) break ; if ( nCount > iter) break ; nCount++ ; double dbApp = src. get ( nRow, nRow) ; double dbApq = src. get ( nRow, nCol) ; double dbAqq = src. get ( nCol, nCol) ; double dbAngle = 0.5 * Math. atan2 ( - 2 * dbApq, dbAqq- dbApp) ; double dbSinTheta = Math. sin ( dbAngle) ; double dbCosTheta = Math. cos ( dbAngle) ; double dbSin2Theta = Math. sin ( 2 * dbAngle) ; double dbCos2Theta = Math. cos ( 2 * dbAngle) ; src. set ( nRow, nRow, dbApp* dbCosTheta* dbCosTheta + dbAqq* dbSinTheta* dbSinTheta + 2 * dbApq* dbCosTheta* dbSinTheta) ; src. set ( nCol, nCol, dbApp* dbSinTheta* dbSinTheta + dbAqq* dbCosTheta* dbCosTheta - 2 * dbApq* dbCosTheta* dbSinTheta) ; src. set ( nRow, nCol, 0.5 * ( dbAqq- dbApp) * dbSin2Theta + dbApq* dbCos2Theta) ; src. set ( nCol, nRow, src. get ( nRow, nCol) ) ; for ( int i = 0 ; i < src. numRows; i ++ ) { if ( ( i!= nCol) && ( i!= nRow) ) { dbMax = src. get ( i, nRow) ; src. set ( i, nRow, src. get ( i, nCol) * dbSinTheta+ dbMax* dbCosTheta) ; src. set ( i, nCol, src. get ( i, nCol) * dbCosTheta- dbMax* dbSinTheta) ; } } for ( int j = 0 ; j < src. numRows; j ++ ) { if ( ( j!= nCol) && ( j!= nRow) ) { dbMax = src. get ( nRow, j) ; src. set ( nRow, j, src. get ( nCol, j) * dbSinTheta+ dbMax* dbCosTheta) ; src. set ( nCol, j, src. get ( nCol, j) * dbCosTheta- dbMax* dbSinTheta) ; } } for ( int i = 0 ; i < src. numRows; i ++ ) { dbMax = values. get ( i, nRow) ; values. set ( i, nRow, values. get ( i, nCol) * dbSinTheta+ dbMax* dbCosTheta) ; values. set ( i, nCol, values. get ( i, nCol) * dbCosTheta- dbMax* dbSinTheta) ; } } double [ ] eig = new double [ src. numRows] ; for ( int i= 0 ; i< src. numRows; i++ ) { eig[ i] = src. get ( i, i) ; } int [ ] sortInx = argsort ( eig) ; DenseMatrix64F tmpValues = new DenseMatrix64F ( src. numRows, src. numCols) ; for ( int i= 0 ; i< src. numRows; i++ ) { for ( int j= 0 ; j< src. numRows; j++ ) { tmpValues. set ( i, j, values. get ( j, sortInx[ i] ) ) ; } eig[ i] = src. get ( sortInx[ i] , sortInx[ i] ) ; } for ( int i = 0 ; i < src. numRows; i ++ ) { double dSumVec = 0 ; for ( int j = 0 ; j < src. numRows; j ++ ) dSumVec += tmpValues. get ( j, i) ; if ( dSumVec< 0 ) { for ( int j = 0 ; j < src. numRows; j ++ ) tmpValues. set ( j, i, tmpValues. get ( j, i) * - 1 ) ; } } return new EDInfo ( tmpValues, eig) ; } public static int [ ] argsort ( double [ ] input) { int [ ] rs = new int [ input. length] ; for ( int i= 0 ; i< input. length; i++ ) { rs[ i] = i; } for ( int i= 0 ; i< input. length- 1 ; i++ ) { for ( int j= i+ 1 ; j< input. length; j++ ) { if ( input[ i] < input[ j] ) { double tmp = input[ i] ; int tmpIndex = rs[ j] ; input[ i] = input[ j] ; input[ j] = tmp; rs[ j] = rs[ i] ; rs[ i] = tmpIndex; } } } return rs; } static ArrayList< String> tempc= new ArrayList < > ( ) ; public double [ ] [ ] readData ( ) throws IOException { double [ ] [ ] res= new double [ 78 ] [ 13 ] ; try { File filename = new File ( "src/bp/test.txt" ) ; InputStreamReader reader = new InputStreamReader ( new FileInputStream ( filename) ) ; BufferedReader br = new BufferedReader ( reader) ; String line = "" ; line = br. readLine ( ) ; int j= 0 ; while ( line != null) { String[ ] temp= line. split ( "," ) ; for ( int i= 0 ; i< 13 ; i++ ) { res[ j] [ i] = Double. parseDouble ( temp[ i] ) ; System. out. print ( res[ j] [ i] + " " ) ; } tempc. add ( temp[ 13 ] ) ; System. out. println ( ) ; j++ ; line = br. readLine ( ) ; } } catch ( Exception e) { e. printStackTrace ( ) ; } return res; } public static void writeTxt ( DenseMatrix64F denseMatrix64F) { try { StringBuilder stringBuilder= new StringBuilder ( ) ; for ( int i= 0 ; i< denseMatrix64F. numRows; i++ ) { for ( int j= 0 ; j< denseMatrix64F. numCols; j++ ) stringBuilder. append ( denseMatrix64F. get ( i, j) ) . append ( ',' ) ; stringBuilder. append ( tempc. get ( i) ) . append ( "\n" ) ; } File writename = new File ( "src/bp/test(low).txt" ) ; writename. createNewFile ( ) ; BufferedWriter out = new BufferedWriter ( new FileWriter ( writename) ) ; out. write ( stringBuilder. toString ( ) ) ; out. flush ( ) ; out. close ( ) ; } catch ( Exception e) { e. printStackTrace ( ) ; } } public static void main ( String[ ] args) throws IOException { PCA pca = new PCA ( ) ; double [ ] [ ] primaryArray = pca. readData ( ) ; System. out. println ( ) ; DenseMatrix64F denseMatrix64F= runPCA ( new DenseMatrix64F ( primaryArray) , 10 ) ; writeTxt ( denseMatrix64F) ; } }
最小最大规范化
public class DealData { static double [ ] max= new double [ 14 ] ; static double [ ] min= new double [ 14 ] ; static ArrayList< String> list1= new ArrayList < > ( ) ; public static List< List< Double> > readTxt ( String fileName) { List< List< Double> > list= new ArrayList < > ( ) ; Arrays. fill ( max, Integer. MIN_VALUE) ; Arrays. fill ( min, Integer. MAX_VALUE) ; try { File filename = new File ( fileName) ; InputStreamReader reader = new InputStreamReader ( new FileInputStream ( filename) ) ; BufferedReader br = new BufferedReader ( reader) ; String line = "" ; line = br. readLine ( ) ; while ( line != null) { if ( line. length ( ) > 0 ) { String[ ] temp= line. split ( "," ) ; ArrayList< Double> strings= new ArrayList < > ( ) ; for ( int i= 0 ; i< temp. length- 1 ; i++ ) { strings. add ( Double. parseDouble ( temp[ i] ) ) ; max[ i] = Math. max ( Double. parseDouble ( temp[ i] ) , max[ i] ) ; min[ i] = Math. min ( Double. parseDouble ( temp[ i] ) , min[ i] ) ; } list1. add ( temp[ temp. length- 1 ] ) ; list. add ( strings) ; } line = br. readLine ( ) ; } } catch ( Exception e) { e. printStackTrace ( ) ; } return list; } public static void writeTxt ( String content) { try { File writename = new File ( "src/bp/trainBayes.txt" ) ; writename. createNewFile ( ) ; BufferedWriter out = new BufferedWriter ( new FileWriter ( writename) ) ; out. write ( content) ; out. flush ( ) ; out. close ( ) ; } catch ( Exception e) { e. printStackTrace ( ) ; } } public static void main ( String[ ] args) { List< List< Double> > list= readTxt ( "src/bp/train(low).txt" ) ; StringBuilder stringBuilder= new StringBuilder ( ) ; for ( int i= 0 ; i< list. size ( ) ; i++ ) { for ( int j= 0 ; j< list. get ( i) . size ( ) - 1 ; j++ ) { double gap= Math. ceil ( ( max[ j] - min[ j] ) / 8 ) ; stringBuilder. append ( Math. round ( ( list. get ( i) . get ( j) - min[ j] ) / gap) ) . append ( ',' ) ; } stringBuilder. append ( list1. get ( i) ) ; stringBuilder. append ( "\n" ) ; } writeTxt ( stringBuilder. toString ( ) ) ; }
}