一些更高级的ndarray处理
where和一些其他的逻辑运算
np.where(cond,x,y):满足条件(cond)输出x,不满足输出y
x_arr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])y_arr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])cond = np.array([True, False, True, True, False])print(np.where(cond, x_arr, y_arr))
[ 1.1 2.2 1.3 1.4 2.5]
arr = np.random.randn(4,4)print(arr)print(np.where(arr > 0, 2, -2))print(np.where(arr > 0, 2, arr))
[[ -1.10484247e+00 -3.82422727e-01 -3.24361549e-01 1.21286234e+00] [ 1.54499855e-01 -4.77728163e-04 1.44621074e+00 -2.64241611e-03] [ 1.36394862e+00 6.96638259e-02 -2.75237740e-01 -3.32892881e-01] [ -1.37165175e+00 1.79997993e-01 -1.13509664e-01 1.88373639e+00]][[-2 -2 -2 2] [ 2 -2 2 -2] [ 2 2 -2 -2] [-2 2 -2 2]][[ -1.10484247e+00 -3.82422727e-01 -3.24361549e-01 2.00000000e+00] [ 2.00000000e+00 -4.77728163e-04 2.00000000e+00 -2.64241611e-03] [ 2.00000000e+00 2.00000000e+00 -2.75237740e-01 -3.32892881e-01] [ -1.37165175e+00 2.00000000e+00 -1.13509664e-01 2.00000000e+00]]
np.where可以嵌套使用
cond_1 = np.array([True, False, True, True, False])cond_2 = np.array([False, True, False, True, False])result = np.where(cond_1 & cond_2, 0, np.where(cond_1, 1, np.where(cond_2, 2, 3)))print(result)
[1 2 1 0 3]
arr = np.random.randn(10)print(arr)print((arr > 0).sum()) #数组中大于0的数相加
[ 0.27350655 -1.51093462 0.26835915 -0.45991855 1.34450904 -1.86871203 0.04308971 1.69640444 -0.02191351 -0.43875275]5
bools = np.array([False, False, True, False])print(bools.any()) # 有一个为True则返回Trueprint(bools.all()) # 有一个为False则返回False
TrueFalse
reshape(数组变形)
numpy可以很容易地把一维数组转成二维数组,三维数组。
import numpy as nparr = np.arange(8)print("(4,2):", arr.reshape((4,2)))print()print("(2,2,2):", arr.reshape((2,2,2)))
(4,2): [[0 1] [2 3] [4 5] [6 7]](2,2,2): [[[0 1] [2 3]] [[4 5] [6 7]]]
-1( 维度自动推算)
如果我们在某一个维度上写上-1,numpy会帮我们自动推导出正确的维度
arr = np.arange(15)print(arr.reshape((5,-1)))print(arr.reshape((5,-1)).shape)
[[ 0 1 2] [ 3 4 5] [ 6 7 8] [ 9 10 11] [12 13 14]](5, 3)
ravel(拉平数组)
# 高维数组用ravel来拉平成为一维数组arr = np.arange(15)print(arr.ravel())
[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14]
concatenate(连接数组)
arr1 = np.array([[1, 2, 3], [4, 5, 6]])arr2 = np.array([[7, 8, 9], [10, 11, 12]])print(np.concatenate([arr1, arr2], axis = 0)) # 按行连接print(np.concatenate([arr1, arr2], axis = 1)) # 按列连接
[[ 1 2 3] [ 4 5 6] [ 7 8 9] [10 11 12]][[ 1 2 3 7 8 9] [ 4 5 6 10 11 12]]
连接的另一种表述垂直stack与水平stack
print(np.vstack((arr1, arr2))) # 垂直堆叠print(np.hstack((arr1, arr2))) # 水平堆叠
[[ 1 2 3] [ 4 5 6] [ 7 8 9] [10 11 12]][[ 1 2 3 7 8 9] [ 4 5 6 10 11 12]]
split(拆分数组)
arr = np.random.rand(5,5)print(arr)
[[ 0.08218151 0.25291976 0.990262 0.74980044 0.92433676] [ 0.57215647 0.88759783 0.67939949 0.18618301 0.64810013] [ 0.21424794 0.5812622 0.33170632 0.40780156 0.00946797] [ 0.46223634 0.53574553 0.25289433 0.33226224 0.26110024] [ 0.81823359 0.98863697 0.13713923 0.3520669 0.38301044]]
first, second, third = np.split(arr, [1,3], axis = 0) # 按行拆分print(first)print()print(second)print()print(third)
[[ 0.08218151 0.25291976 0.990262 0.74980044 0.92433676]][[ 0.57215647 0.88759783 0.67939949 0.18618301 0.64810013] [ 0.21424794 0.5812622 0.33170632 0.40780156 0.00946797]][[ 0.46223634 0.53574553 0.25289433 0.33226224 0.26110024] [ 0.81823359 0.98863697 0.13713923 0.3520669 0.38301044]]
first, second, third = np.split(arr, [1, 3], axis = 1) # 按列拆分print(first)print()print(second)print()print(third)
[[ 0.08218151] [ 0.57215647] [ 0.21424794] [ 0.46223634] [ 0.81823359]][[ 0.25291976 0.990262 ] [ 0.88759783 0.67939949] [ 0.5812622 0.33170632] [ 0.53574553 0.25289433] [ 0.98863697 0.13713923]][[ 0.74980044 0.92433676] [ 0.18618301 0.64810013] [ 0.40780156 0.00946797] [ 0.33226224 0.26110024] [ 0.3520669 0.38301044]]
堆叠辅助
arr = np.arange(6)arr1 = arr.reshape((3, 2))arr2 = np.random.randn(3, 2)#r_用于按行堆叠print(np.r_[arr1, arr2])print()#c_用于按列堆叠print(np.c_[np.r_[arr1, arr2], arr])print()#切片直接转为数组print(np.c_[1:6, -10:-5])print()
[[ 0. 1. ] [ 2. 3. ] [ 4. 5. ] [ 0.04811148 -1.93674347] [ 1.19646481 0.17346639] [-1.4388562 -1.41584843]][[ 0. 1. 0. ] [ 2. 3. 1. ] [ 4. 5. 2. ] [ 0.04811148 -1.93674347 3. ] [ 1.19646481 0.17346639 4. ] [-1.4388562 -1.41584843 5. ]][[ 1 -10] [ 2 -9] [ 3 -8] [ 4 -7] [ 5 -6]]
repeat(数组重复)
repeat(a,repeats, axis=None)
- 按元素重复
arr = np.arange(3)print(arr.repeat(3))print(arr.repeat([2,3,4]))print()
[0 0 0 1 1 1 2 2 2][0 0 1 1 1 2 2 2 2]
- 指定axis来重复
arr = np.arange(4)print(arr)
[[ 0.468845 0.43227877] [ 0.13822954 0.14501615]]
print(arr.repeat(2, axis=0))print(arr.repeat(2, axis=1))
[[ 0.468845 0.43227877] [ 0.468845 0.43227877] [ 0.13822954 0.14501615] [ 0.13822954 0.14501615]][[ 0.468845 0.468845 0.43227877 0.43227877] [ 0.13822954 0.13822954 0.14501615 0.14501615]]
tile(按规则重复数组)
tile通过重复给定的次数来构造数组。tile(A, reps):初始数组是A,重复规则是reps。reps表示数组A需要重复的次数、结果的行数。
arr = np.arange(4).reshape((2, 2))print(np.tile(arr, 2))print(np.tile(arr, (2,3)))
[[0 1 0 1] [2 3 2 3]][[0 1 0 1 0 1] [2 3 2 3 2 3] [0 1 0 1 0 1] [2 3 2 3 2 3]]
numpy的文件输入输出
读取csv文件作为数组
import numpy as nparr = np.loadtxt('array_ex.txt', delimiter=',')print(arr)
[[ 0.580052 0.18673 1.040717 1.134411] [ 0.194163 -0.636917 -0.938659 0.124094] [-0.12641 0.268607 -0.695724 0.047428] [-1.484413 0.004176 -0.744203 0.005487] [ 2.302869 0.200131 1.670238 -1.88109 ] [-0.19323 1.047233 0.482803 0.960334]]
数组文件读写
arr = np.arange(10)np.save('some_array', arr)
print(np.load('some_array.npy'))
[0 1 2 3 4 5 6 7 8 9]
多个数组可以一起压缩存储
arr2 = np.arange(15).reshape(3,5)np.savez('array_archive.npz', a=arr, b=arr2)
arch = np.load('array_archive.npz')print(arch['a'])print(arch['b'])
[0 1 2 3 4 5 6 7 8 9][[ 0 1 2 3 4] [ 5 6 7 8 9] [10 11 12 13 14]]
用numpy写一个softmax
步骤:
- 数据预处理
- 计算exponential
- 每行求和
- 每一行除以计算的和
import numpy as np# 产生(10,10)随机数m = np.random.rand(10, 10) * 10 + 1000print(m)
[[ 1002.4195769 1000.59428635 1004.19947044 1009.17641327 1004.89329928 1001.02496808 1007.79619575 1005.61568017 1009.28511386 1000.11608716] [ 1002.9870141 1005.59523328 1001.99337934 1008.79319814 1004.78921679 1003.91814186 1009.38777432 1005.20436416 1009.27099589 1008.69823987] [ 1006.68713949 1009.02893339 1008.2656608 1002.27620211 1009.2256124 1004.14144532 1007.09728075 1006.21626467 1004.60860132 1004.51547132] [ 1005.57757481 1001.6026775 1004.79229078 1004.28025577 1008.68219699 1005.6379599 1008.07958879 1006.35060616 1009.03418483 1003.50279599] [ 1003.22924339 1006.62272977 1008.5591972 1009.72498967 1004.49414198 1004.21450523 1008.32652935 1000.90418303 1009.24606203 1001.27113066] [ 1006.84865072 1005.24619541 1000.04356362 1003.38870582 1008.59759772 1008.80052236 1007.92905671 1006.16987466 1002.3761379 1001.55941284] [ 1006.80724007 1004.46597582 1003.25453387 1008.55713243 1009.19618236 1002.06897172 1004.69874948 1006.51535711 1005.23735087 1006.85265988] [ 1002.22993628 1000.59475018 1007.52711923 1000.36311206 1008.22254861 1003.94553055 1004.23517969 1005.26438502 1006.39421888 1005.22133756] [ 1006.92863693 1003.23688304 1007.11513614 1003.28880837 1009.11093137 1006.35136574 1002.04684923 1001.13114541 1008.50487627 1008.67481458] [ 1002.65347387 1001.90472796 1004.02149562 1009.63548587 1009.16220671 1006.39781332 1008.1526219 1003.57220839 1008.60930803 1004.41645034]]
直接对m进行e指数运算会产生上溢
print(np.exp(m))
[[ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf] [ inf inf inf inf inf inf inf inf inf inf]]G:Anaconda3libsite-packagesipykernel_launcher.py:1: RuntimeWarning: overflow encountered in exp """Entry point for launching an IPython kernel.
寻找每一行的最大值
#按列取最大值(即取每一行的最大值)m_row_max = m.max(axis=1).reshape(10,1)print(m_row_max, m_row_max.shape)
[[ 1009.28511386] [ 1009.38777432] [ 1009.2256124 ] [ 1009.03418483] [ 1009.72498967] [ 1008.80052236] [ 1009.19618236] [ 1008.22254861] [ 1009.11093137] [ 1009.63548587]] (10, 1)
通过广播的方式将每行数据减去对应行的最大值
# 采用广播的方式进行减法操作m = m - m_row_maxprint(m)
[[-6.86553696 -8.69082751 -5.08564343 -0.1087006 -4.39181458 -8.26014579 -1.48891811 -3.66943369 0. -9.16902671] [-6.40076022 -3.79254104 -7.39439498 -0.59457618 -4.59855753 -5.46963247 0. -4.18341016 -0.11677843 -0.68953445] [-2.5384729 -0.19667901 -0.95995159 -6.94941029 0. -5.08416708 -2.12833165 -3.00934773 -4.61701107 -4.71014107] [-3.45661002 -7.43150733 -4.24189405 -4.75392907 -0.35198784 -3.39622493 -0.95459604 -2.68357867 0. -5.53138884] [-6.49574628 -3.1022599 -1.16579247 0. -5.23084769 -5.51048445 -1.39846033 -8.82080664 -0.47892764 -8.45385902] [-1.95187164 -3.55432696 -8.75695874 -5.41181655 -0.20292464 0. -0.87146565 -2.63064771 -6.42438446 -7.24110952] [-2.3889423 -4.73020655 -5.94164849 -0.63904993 0. -7.12721064 -4.49743288 -2.68082526 -3.95883149 -2.34352249] [-5.99261232 -7.62779843 -0.69542937 -7.85943655 0. -4.27701805 -3.98736891 -2.95816359 -1.82832972 -3.00121104] [-2.18229443 -5.87404833 -1.99579523 -5.82212299 0. -2.75956563 -7.06408214 -7.97978595 -0.6060551 -0.43611679] [-6.982012 -7.73075791 -5.61399025 0. -0.47327916 -3.23767255 -1.48286397 -6.06327748 -1.02617783 -5.21903553]]
求预处理后的e指数
#求预处理后的e指数m_exp = np.exp(m)print(m_exp, m_exp.shape)
[[ 1.04312218e-03 1.68120847e-04 6.18490628e-03 8.96998943e-01 1.23782475e-02 2.58621284e-04 2.25616615e-01 2.54909015e-02 1.00000000e+00 1.04217895e-04] [ 1.66029460e-03 2.25382585e-02 6.14688467e-04 5.51796380e-01 1.00663457e-02 4.21278021e-03 1.00000000e+00 1.52464260e-02 8.89782323e-01 5.01809632e-01] [ 7.89869284e-02 8.21454272e-01 3.82911421e-01 9.59200640e-04 1.00000000e+00 6.19404411e-03 1.19035722e-01 4.93238409e-02 9.88228942e-03 9.00350735e-03] [ 3.15364890e-02 5.92294057e-04 1.43803289e-02 8.61776882e-03 7.03288672e-01 3.34994945e-02 3.84967625e-01 6.83182276e-02 1.00000000e+00 3.96048477e-03] [ 1.50984802e-03 4.49475108e-02 3.11675571e-01 1.00000000e+00 5.34898908e-03 4.04414773e-03 2.46976935e-01 1.47629228e-04 6.19447308e-01 2.13076561e-04] [ 1.42008035e-01 2.86006179e-02 1.57362462e-04 4.46352464e-03 8.16339758e-01 1.00000000e+00 4.18337963e-01 7.20317916e-02 1.62153108e-03 7.16516327e-04] [ 9.17266523e-02 8.82464816e-03 2.62769434e-03 5.27793627e-01 1.00000000e+00 8.02955997e-04 1.11375513e-02 6.85065952e-02 1.90854027e-02 9.59889224e-02] [ 2.49713221e-03 4.86731255e-04 4.98860204e-01 3.86091355e-04 1.00000000e+00 1.38840018e-02 1.85484526e-02 5.19141655e-02 1.60681727e-01 4.97268106e-02] [ 1.12782462e-01 2.81146852e-03 1.35905535e-01 2.96131163e-03 1.00000000e+00 6.33192663e-02 8.55279590e-04 3.42312686e-04 5.45498570e-01 6.46542214e-01] [ 9.28433319e-04 4.39111184e-04 3.64648989e-03 1.00000000e+00 6.22956140e-01 3.92551533e-02 2.26986674e-01 2.32676246e-03 3.58374111e-01 5.41254683e-03]] (10, 10)
将求指数后的数据按列加和(每行求和),然后将一维数据(10,)reshape成(10,1)
m_exp_row_sum = m_exp.sum(axis = 1).reshape(10,1)print(m_exp_row_sum, m_exp_row_sum.shape)
[[ 2.1682437 ] [ 2.99772713] [ 2.47775123] [ 2.24916138] [ 2.23431102] [ 2.4842771 ] [ 1.82649405] [ 1.79698532] [ 2.51101842] [ 2.26032542]] (10, 1)
每行的数据除以对应行e指数求和
m_softmax = m_exp / m_exp_row_sumprint(m_softmax)
[[ 4.81090841e-04 7.75378004e-05 2.85249591e-03 4.13698398e-01 5.70888203e-03 1.19276853e-04 1.04055008e-01 1.17564744e-02 4.61202771e-01 4.80655820e-05] [ 5.53851145e-04 7.51844898e-03 2.05051507e-04 1.84071584e-01 3.35799265e-03 1.40532478e-03 3.33586066e-01 5.08599528e-03 2.96818985e-01 1.67396701e-01] [ 3.18784741e-02 3.31532183e-01 1.54539898e-01 3.87125483e-04 4.03591769e-01 2.49986522e-03 4.80418376e-02 1.99066962e-02 3.98841067e-03 3.63374146e-03] [ 1.40214434e-02 2.63339955e-04 6.39364033e-03 3.83154756e-03 3.12689288e-01 1.48942156e-02 1.71160517e-01 3.03749780e-02 4.44610159e-01 1.76087176e-03] [ 6.75755530e-04 2.01169445e-02 1.39495159e-01 4.47565264e-01 2.39402171e-03 1.81002005e-03 1.10538297e-01 6.60737144e-05 2.77243098e-01 9.53656673e-05] [ 5.71627193e-02 1.15126521e-02 6.33433613e-05 1.79670965e-03 3.28602537e-01 4.02531586e-01 1.68394243e-01 2.89950713e-02 6.52717479e-04 2.88420453e-04] [ 5.02200663e-02 4.83146833e-03 1.43865475e-03 2.88965424e-01 5.47496993e-01 4.39615994e-04 6.09777585e-03 3.75071549e-02 1.04492006e-02 5.25536464e-02] [ 1.38962304e-03 2.70859896e-04 2.77609505e-01 2.14855041e-04 5.56487574e-01 7.72627449e-03 1.03219834e-02 2.88895880e-02 8.94173844e-02 2.76723522e-02] [ 4.49150276e-02 1.11965269e-03 5.41236712e-02 1.17932692e-03 3.98244789e-01 2.52165679e-02 3.40610640e-04 1.36324243e-04 2.17241963e-01 2.57482067e-01] [ 4.10752058e-04 1.94269011e-04 1.61325881e-03 4.42414172e-01 2.75604625e-01 1.73670361e-02 1.00422121e-01 1.02939269e-03 1.58549786e-01 2.39458743e-03]]
验证一下,对输出值进行按列求和,每行结果应该均为1
print(m_softmax.sum(axis=1))
[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
参考
[numpy指南]http://docs.scipy.org/doc/numpy/reference/
[numpy ndarray详解]https://danzhuibing.github.io/py_numpy_ndarray.html
[NumPy-快速处理数据]http://old.sebug.net/paper/books/scipydoc/numpy_intro.html