



s1 = pd.Series([2, 3, 4, 1, 2, 5, 3, 6, 4, 9, 5, 3, 4, 2, 1, 2])print(s1.unique())
→[2 3 4 1 5 6 9]


df1 = pd.DataFrame({'a': [1, 1, 3, 2],'b': [1, 1, 6, 4],'c': [1, 1, 3, 9]})print(df1.drop_duplicates())
→  a  b  c
0  1  1  1
2  3  6  3
3  2  4  9



df2 = pd.DataFrame(np.arange(12).reshape(4, 3), index=[0, 1, 2, 3], columns=['a', 'b', 'c'])
df2.iloc[1, [1]] = np.nan
df2.iloc[2, [1, 2]] = np.nanprint(df2)
→  a     b     c
0  0   1.0   2.0
1  3   NaN   5.0
2  6   NaN   NaN
3  9  10.0  11.0


        删除数据意味着减少数据特征,通常用于样本数据量很大且缺失值较少的情况。dropna()函数的原型为dropna(axis, how, thresh, subset, inplace),参数分别为:






→  a     b     c
0  0   1.0   2.0
3  9  10.0  11.0print(df2.dropna(axis=1))
→  a
0  0
1  3
2  6
3  9print(df2.dropna(axis=1, how='all'))
→  a     b     c
0  0   1.0   2.0
1  3   NaN   5.0
2  6   NaN   NaN
3  9  10.0  11.0print(df2.dropna(axis=1, how='any'))
→  a
0  0
1  3
2  6
3  9print(df2.dropna(axis=1, thresh=3))
→  a     c
0  0   2.0
1  3   5.0
2  6   NaN
3  9  11.0print(df2.dropna(subset=['c']))
→  a     b     c
0  0   1.0   2.0
1  3   NaN   5.0
3  9  10.0  11.0df2.dropna(subset=['c'], inplace=True)
→  a     b     c
0  0   1.0   2.0
1  3   NaN   5.0
3  9  10.0  11.0



df2['b'].fillna(df2['b'].mean(), inplace=True)
→  a     b     c
0  0   1.0   2.0
1  3   5.5   5.0
2  6   5.5   NaN
3  9  10.0  11.0df2['c'].fillna(df2['c'].median(), inplace=True)
→  a     b     c
0  0   1.0   2.0
1  3   5.5   5.0
2  6   5.5   5.0
3  9  10.0  11.0


        很多模型对于缺失值有较高的容忍度或更灵活的处理方法,并且补齐处理只是用主观估计值来填补未知值,不一定完全符合客观事实,有时会改变原始的系统信息或引入新的噪声数据。常见能自动处理缺失值的模型有K-Nearest Neighbor、决策树模型、随机森林模型、神经网络、朴素贝叶斯、DBSCAN等,它们将缺失值忽略,不参与运算,或将缺失作为分布的状态参与建模过程。












(1)使用 % 格式化

%d 以整型输出
%ld 以长整型输出
%o 以八进制数形式输出整数
%x 以十六进制数形式输出整数
%u 以十进制数输出unsigned型数据(无符号数)
%c 用来输出一个字符
%s 用来输出一个字符串
%f 用来输出实数,以小数形式输出
%e 以指数形式输出实数
%g 根据大小自动选f格式或e格式,且不输出无意义的零

num = 13579print("%o" % num)
→32413print("%x" % num)
→350bprint("%u" % num)
→13579print("%s" % num)
→13579print("%f" % num)
→13579.000000print("%e" % num)
→1.357900e+04print("%g" % num)
→13579print("%.2f" % num)


print("{}{}".format('A', 'B'))            # 不带编号,必须一一对应
→ABprint("{1}{0}{1}".format('A', 'B'))       # 带编号打乱顺序
→BABprint("{a}{b}{a}".format(a='A', b='B'))   # 带关键字
① 取位数
print("{:10}".format('hello'))                    # 取10位默认左对齐
→hello print("{0} to {1:.2f}".format(np.pi, np.pi))      # 取两位小数
→3.141592653589793 to 3.14print("{0:5.2} to {0:5.2f}".format(np.pi, np.pi)) # 保留两位有效数字与取两位小数对比
→  3.1 to  3.14
② 进制转化
print("int: {0:d}; hex: {0:x}; oct: {0:o}; bin: {0:b}".format(16))
→int: 16; hex: 10; oct: 20; bin: 10000print("{0} in hex is: {0:#x}\n{1} in oct is: {0:#o}".format(18, 10))
→18 in hex is: 0x1210 in oct is: 0o22
③ 字符串对齐与位数补全
print("{:<20}".format("hello world!"))
→hello world!        print("{:^20}".format("hello world!"))
→    hello world!    print("{:>20}".format("hello world!"))
→        hello world!print("{:*<20}".format("hello world!"))
→hello world!********print("{:-^20}".format("hello world!"))
→----hello world!----print("{:#>20}".format("hello world!"))
→########hello world!print("{:0=10}".format(12345))
→12.500000%print("my name is {name}, my age is {age}, and my QQ is {qq}.".format(name="67x", age=21, qq="409867818"))
→my name is 67x, my age is 21, and my QQ is 409867818.position = (1, 2, 3)
print("X: {0[0]}; Y: {0[1]}; Z: {0[2]}".format(position))
→X: 1; Y: 2; Z: 3


name = "67x"
age = 21print(f"my name is {name}, my age is {age}")
→my name is 67x, my age is 21width = 10
precision = 4
value = 11 / 3print(f"result: {value:{width}.{precision}}")
→result:      3.667a = 10
b = 5
c = 3
pi = np.piprint(f"{pi:{a}.{b}}")
→    3.1416print(f"{pi:{a}.{c}f}")
→     3.142print(f"{a * 2}")
→20print(f"{name * 3}")



        函数原型为re.match(pattern, string, flags=0),参数分别为:




print(re.match("www", "www.baidu.com").span())
→(0, 3)print(re.match("com", "www.baidu.com"))
→Nonestr1 = "Cats are smarter than dogs"
matchObj1 = re.match(r"(.*) are (.*?) (.*)", str1, re.M | re.I)print(matchObj1.group())
→Cats are smarter than dogsprint(matchObj1.group(1))
→than dogsmatchObj2 = re.match(r"(.*) are (.*?) .*", str1, re.M | re.I)print(matchObj2.group())
→Cats are smarter than dogsprint(matchObj2.group(1))
→smartermatchObj3 = re.match(r"(.*) are (.*) .*", str1, re.M | re.I)print(matchObj3.group())
→Cats are smarter than dogsprint(matchObj3.group(1))
→smarter thanmatchObj4 = re.match(r"(.*) are (.*)", str1, re.M | re.I)print(matchObj4.group())
→Cats are smarter than dogsprint(matchObj4.group(1))
→smarter than dogs

        re.match(r"(.*) are (.*) .*", str1, re.M | re.I) 的具体匹配过程如下:

re.match() 函数从头开始匹配字符串 str1,尝试将其分成若干个子串,使每个子串能够匹配正则表达式模式;

匹配过程从左到右进行。模式中的 (.*) 匹配任意数量的任意字符,并将匹配的结果保存在分组 1 中。因为这是一个贪婪模式,它会尽可能多地匹配字符,直到遇到匹配模式中的下一个字符或结束位置。在这个例子中,(.*) 匹配了 "Cats";

模式中的 " are " 匹配了字符串中的 " are ";

模式中的 (.*?) 匹配任意数量的任意字符,并将匹配的结果保存在分组 2 中。因为这是一个非贪婪模式。它会尽可能少地匹配字符,直到遇到匹配模式中的下一个字符或结束位置。在这个例子中,(.*?) 匹配了 "smarter";

模式中的 " " 匹配了字符串中的空格;

.* 匹配任意数量的任意字符,并将其忽略。在这个例子中,它匹配了 "than dogs"。


        函数原型为re.search(pattern, string, flags=0),参数分别为:




print(re.search("www", "www.baidu.com").span())
→(0, 3)print(re.search("baidu", "www.baidu.com").span())
→(4, 9)print(re.search("com", "www.baidu.com").span())
→(10, 13)print(re.search("\.", "www.baidu.com"))
→<re.Match object; span=(3, 4), match='.'>print(re.search("67x", "www.baidu.com"))


        函数原型为re.sub(pattern, repl, string, count=0),参数分别为:





phone = "2020-221-342#67x"print(re.sub(r"#.*$", "", phone))    # 删除#后字符
→2020-221-342print(re.sub(r"-", "*", phone))      # 删除-符号


        函数原型为re.compile(pattern[, flags]),参数分别为:









pattern1 = re.compile(r"\d+")        # 匹配至少1个数字
str2 = "LiuXin67X"print(pattern1.match(str2))          # 查找头部没有匹配
→NonematchObj5 = pattern1.match(str2, 6, len(str2))print(matchObj5)                     # 返回一个Match对象
→<re.Match object; span=(6, 8), match='67'>print(matchObj5.group())
→(6, 8)pattern2 = re.compile(r"([a-z]+) ([a-z]+)", re.I)
str3 = "Hello World Wide Web"
matchObj6 = pattern2.match(str3)print(matchObj6)
→<re.Match object; span=(0, 11), match='Hello World'>print(matchObj6.group())             # 返回匹配成功的整个子串
→Hello Worldprint(matchObj6.span())              # 返回匹配成功的整个子串的索引
→(0, 11)print(matchObj6.group(1))            # 返回第一个分组匹配成功的子串
→Helloprint(matchObj6.span(1))             # 返回第一个分组匹配成功的子串的索引
→(0, 5)print(matchObj6.group(2))            # 返回第二个分组匹配成功的子串
→Worldprint(matchObj6.span(2))             # 返回第二个分组匹配成功的子串的索引
→(6, 11)print(matchObj6.groups())            # 等价于(matchObj6.groups(1), matchObj6.groups(2), ...)
→('Hello', 'World')


        函数原型为findall(string[, pos[, endpos]]),参数分别为:




str4 = "baidu 123 google 456."print(pattern1.findall(str4))
→['123', '456']print(pattern1.findall(str4, 7, 19))
→['23', '45']


        函数原型为finditer(string[, pos[, endpos]]),参数分别为:





it = re.finditer(pattern1, str4)
for i in it:print(i.group())


        函数原型为re.split(pattern, string[, maxsplit=0, flag=0]),参数分别为:





str5 = " baidu 123 google 456."print(re.split("x", str4))
→['baidu 123 google 456.']print(re.split("\W+", str4))
→['baidu', '123', 'google', '456', '']print(re.split("\W+", str4, 1))
→['baidu', '123 google 456.']print(re.split("\W+", str4, 2))
→['baidu', '123', 'google 456.']print(re.split("(\W+)", str4))
→['baidu', ' ', '123', ' ', 'google', ' ', '456', '.', '']print(re.split("\W+", str5))
→['', 'baidu', '123', 'google', '456', '']print(re.split("(\W+)", str5))
→['', ' ', 'baidu', ' ', '123', ' ', 'google', ' ', '456', '.', '']





df3 = pd.DataFrame({"A": ['a', 'b', 'c', 'a', 'b', 'a'], "B": [10, 15, 5, 2, 8, 4]})
df4 = pd.DataFrame({"A": ['a', 'b', 'c', 'b', 'a'], "B1": [3, 5, 6, 8, 9], "B2": [2, 5, 9, 6, 8]})
combine1 = df3["B"].groupby(df3["A"])
combine2 = df3.groupby(df3.dtypes, axis=1)
combine3 = df4.groupby("A")print(combine1.mean())
→Aa     5.333333b    11.500000c     5.000000Name: B, dtype: float64print(combine1.size())
→Aa    3b    2c    1Name: B, dtype: int64print(dict(list(combine2)))
→{dtype('int64'):     B0  101  152   53   24   85   4, dtype('O'):   A0  a1  b2  c3  a4  b5  a}print(combine1.agg("mean"))
→Aa     5.333333b    11.500000c     5.000000Name: B, dtype: float64print(combine1.agg(["mean", "sum", "std"]))
→       mean  sum       std
a   5.333333   16  4.163332
b  11.500000   23  4.949747
c   5.000000    5       NaNprint(combine3.agg({"B1": "mean", "B2": "sum"}))
→   B1  B2
a  6.0  10
b  6.5  11
c  6.0   9


df5 = pd.DataFrame({"level": ['a', 'b', 'c', 'b', 'a'],"key": ["one", "two", "one", "two", "one"],"num1": [3, 5, 6, 8, 9],"num2": [2, 5, 9, 6, 8]})
print(df5.pivot_table(index="key", columns="level"))
→     num1           num2          
level    a    b    c    a    b    c
one    6.0  NaN  6.0  5.0  NaN  9.0
two    NaN  6.5  NaN  NaN  5.5  NaNprint(pd.crosstab(df5.key, df5.level, margins=True))    # 计算分组频率
→level a  b  c  All
one    2  0  1    3
two    0  2  0    2
All    2  2  1    5




transfer1 = MinMaxScaler(feature_range=(0, 1))
transfer2 = StandardScaler()
iris = load_iris()
df6 = transfer1.fit_transform(iris.data)
df7 = transfer2.fit_transform(iris.data)print(iris.data)
→[[5.1 3.5 1.4 0.2][4.9 3.  1.4 0.2][4.7 3.2 1.3 0.2][4.6 3.1 1.5 0.2][5.  3.6 1.4 0.2][5.4 3.9 1.7 0.4][4.6 3.4 1.4 0.3][5.  3.4 1.5 0.2][4.4 2.9 1.4 0.2][4.9 3.1 1.5 0.1][5.4 3.7 1.5 0.2][4.8 3.4 1.6 0.2][4.8 3.  1.4 0.1][4.3 3.  1.1 0.1][5.8 4.  1.2 0.2][5.7 4.4 1.5 0.4][5.4 3.9 1.3 0.4][5.1 3.5 1.4 0.3][5.7 3.8 1.7 0.3][5.1 3.8 1.5 0.3][5.4 3.4 1.7 0.2][5.1 3.7 1.5 0.4][4.6 3.6 1.  0.2][5.1 3.3 1.7 0.5][4.8 3.4 1.9 0.2][5.  3.  1.6 0.2][5.  3.4 1.6 0.4][5.2 3.5 1.5 0.2][5.2 3.4 1.4 0.2][4.7 3.2 1.6 0.2][4.8 3.1 1.6 0.2][5.4 3.4 1.5 0.4][5.2 4.1 1.5 0.1][5.5 4.2 1.4 0.2][4.9 3.1 1.5 0.2][5.  3.2 1.2 0.2][5.5 3.5 1.3 0.2][4.9 3.6 1.4 0.1][4.4 3.  1.3 0.2][5.1 3.4 1.5 0.2][5.  3.5 1.3 0.3][4.5 2.3 1.3 0.3][4.4 3.2 1.3 0.2][5.  3.5 1.6 0.6][5.1 3.8 1.9 0.4][4.8 3.  1.4 0.3][5.1 3.8 1.6 0.2][4.6 3.2 1.4 0.2][5.3 3.7 1.5 0.2][5.  3.3 1.4 0.2][7.  3.2 4.7 1.4][6.4 3.2 4.5 1.5][6.9 3.1 4.9 1.5][5.5 2.3 4.  1.3][6.5 2.8 4.6 1.5][5.7 2.8 4.5 1.3][6.3 3.3 4.7 1.6][4.9 2.4 3.3 1. ][6.6 2.9 4.6 1.3][5.2 2.7 3.9 1.4][5.  2.  3.5 1. ][5.9 3.  4.2 1.5][6.  2.2 4.  1. ][6.1 2.9 4.7 1.4][5.6 2.9 3.6 1.3][6.7 3.1 4.4 1.4][5.6 3.  4.5 1.5][5.8 2.7 4.1 1. ][6.2 2.2 4.5 1.5][5.6 2.5 3.9 1.1][5.9 3.2 4.8 1.8][6.1 2.8 4.  1.3][6.3 2.5 4.9 1.5][6.1 2.8 4.7 1.2][6.4 2.9 4.3 1.3][6.6 3.  4.4 1.4][6.8 2.8 4.8 1.4][6.7 3.  5.  1.7][6.  2.9 4.5 1.5][5.7 2.6 3.5 1. ][5.5 2.4 3.8 1.1][5.5 2.4 3.7 1. ][5.8 2.7 3.9 1.2][6.  2.7 5.1 1.6][5.4 3.  4.5 1.5][6.  3.4 4.5 1.6][6.7 3.1 4.7 1.5][6.3 2.3 4.4 1.3][5.6 3.  4.1 1.3][5.5 2.5 4.  1.3][5.5 2.6 4.4 1.2][6.1 3.  4.6 1.4][5.8 2.6 4.  1.2][5.  2.3 3.3 1. ][5.6 2.7 4.2 1.3][5.7 3.  4.2 1.2][5.7 2.9 4.2 1.3][6.2 2.9 4.3 1.3][5.1 2.5 3.  1.1][5.7 2.8 4.1 1.3][6.3 3.3 6.  2.5][5.8 2.7 5.1 1.9][7.1 3.  5.9 2.1][6.3 2.9 5.6 1.8][6.5 3.  5.8 2.2][7.6 3.  6.6 2.1][4.9 2.5 4.5 1.7][7.3 2.9 6.3 1.8][6.7 2.5 5.8 1.8][7.2 3.6 6.1 2.5][6.5 3.2 5.1 2. ][6.4 2.7 5.3 1.9][6.8 3.  5.5 2.1][5.7 2.5 5.  2. ][5.8 2.8 5.1 2.4][6.4 3.2 5.3 2.3][6.5 3.  5.5 1.8][7.7 3.8 6.7 2.2][7.7 2.6 6.9 2.3][6.  2.2 5.  1.5][6.9 3.2 5.7 2.3][5.6 2.8 4.9 2. ][7.7 2.8 6.7 2. ][6.3 2.7 4.9 1.8][6.7 3.3 5.7 2.1][7.2 3.2 6.  1.8][6.2 2.8 4.8 1.8][6.1 3.  4.9 1.8][6.4 2.8 5.6 2.1][7.2 3.  5.8 1.6][7.4 2.8 6.1 1.9][7.9 3.8 6.4 2. ][6.4 2.8 5.6 2.2][6.3 2.8 5.1 1.5][6.1 2.6 5.6 1.4][7.7 3.  6.1 2.3][6.3 3.4 5.6 2.4][6.4 3.1 5.5 1.8][6.  3.  4.8 1.8][6.9 3.1 5.4 2.1][6.7 3.1 5.6 2.4][6.9 3.1 5.1 2.3][5.8 2.7 5.1 1.9][6.8 3.2 5.9 2.3][6.7 3.3 5.7 2.5][6.7 3.  5.2 2.3][6.3 2.5 5.  1.9][6.5 3.  5.2 2. ][6.2 3.4 5.4 2.3][5.9 3.  5.1 1.8]]print(df6)
→[[0.22222222 0.625      0.06779661 0.04166667][0.16666667 0.41666667 0.06779661 0.04166667][0.11111111 0.5        0.05084746 0.04166667][0.08333333 0.45833333 0.08474576 0.04166667][0.19444444 0.66666667 0.06779661 0.04166667][0.30555556 0.79166667 0.11864407 0.125     ][0.08333333 0.58333333 0.06779661 0.08333333][0.19444444 0.58333333 0.08474576 0.04166667][0.02777778 0.375      0.06779661 0.04166667][0.16666667 0.45833333 0.08474576 0.        ][0.30555556 0.70833333 0.08474576 0.04166667][0.13888889 0.58333333 0.10169492 0.04166667][0.13888889 0.41666667 0.06779661 0.        ][0.         0.41666667 0.01694915 0.        ][0.41666667 0.83333333 0.03389831 0.04166667][0.38888889 1.         0.08474576 0.125     ][0.30555556 0.79166667 0.05084746 0.125     ][0.22222222 0.625      0.06779661 0.08333333][0.38888889 0.75       0.11864407 0.08333333][0.22222222 0.75       0.08474576 0.08333333][0.30555556 0.58333333 0.11864407 0.04166667][0.22222222 0.70833333 0.08474576 0.125     ][0.08333333 0.66666667 0.         0.04166667][0.22222222 0.54166667 0.11864407 0.16666667][0.13888889 0.58333333 0.15254237 0.04166667][0.19444444 0.41666667 0.10169492 0.04166667][0.19444444 0.58333333 0.10169492 0.125     ][0.25       0.625      0.08474576 0.04166667][0.25       0.58333333 0.06779661 0.04166667][0.11111111 0.5        0.10169492 0.04166667][0.13888889 0.45833333 0.10169492 0.04166667][0.30555556 0.58333333 0.08474576 0.125     ][0.25       0.875      0.08474576 0.        ][0.33333333 0.91666667 0.06779661 0.04166667][0.16666667 0.45833333 0.08474576 0.04166667][0.19444444 0.5        0.03389831 0.04166667][0.33333333 0.625      0.05084746 0.04166667][0.16666667 0.66666667 0.06779661 0.        ][0.02777778 0.41666667 0.05084746 0.04166667][0.22222222 0.58333333 0.08474576 0.04166667][0.19444444 0.625      0.05084746 0.08333333][0.05555556 0.125      0.05084746 0.08333333][0.02777778 0.5        0.05084746 0.04166667][0.19444444 0.625      0.10169492 0.20833333][0.22222222 0.75       0.15254237 0.125     ][0.13888889 0.41666667 0.06779661 0.08333333][0.22222222 0.75       0.10169492 0.04166667][0.08333333 0.5        0.06779661 0.04166667][0.27777778 0.70833333 0.08474576 0.04166667][0.19444444 0.54166667 0.06779661 0.04166667][0.75       0.5        0.62711864 0.54166667][0.58333333 0.5        0.59322034 0.58333333][0.72222222 0.45833333 0.66101695 0.58333333][0.33333333 0.125      0.50847458 0.5       ][0.61111111 0.33333333 0.61016949 0.58333333][0.38888889 0.33333333 0.59322034 0.5       ][0.55555556 0.54166667 0.62711864 0.625     ][0.16666667 0.16666667 0.38983051 0.375     ][0.63888889 0.375      0.61016949 0.5       ][0.25       0.29166667 0.49152542 0.54166667][0.19444444 0.         0.42372881 0.375     ][0.44444444 0.41666667 0.54237288 0.58333333][0.47222222 0.08333333 0.50847458 0.375     ][0.5        0.375      0.62711864 0.54166667][0.36111111 0.375      0.44067797 0.5       ][0.66666667 0.45833333 0.57627119 0.54166667][0.36111111 0.41666667 0.59322034 0.58333333][0.41666667 0.29166667 0.52542373 0.375     ][0.52777778 0.08333333 0.59322034 0.58333333][0.36111111 0.20833333 0.49152542 0.41666667][0.44444444 0.5        0.6440678  0.70833333][0.5        0.33333333 0.50847458 0.5       ][0.55555556 0.20833333 0.66101695 0.58333333][0.5        0.33333333 0.62711864 0.45833333][0.58333333 0.375      0.55932203 0.5       ][0.63888889 0.41666667 0.57627119 0.54166667][0.69444444 0.33333333 0.6440678  0.54166667][0.66666667 0.41666667 0.6779661  0.66666667][0.47222222 0.375      0.59322034 0.58333333][0.38888889 0.25       0.42372881 0.375     ][0.33333333 0.16666667 0.47457627 0.41666667][0.33333333 0.16666667 0.45762712 0.375     ][0.41666667 0.29166667 0.49152542 0.45833333][0.47222222 0.29166667 0.69491525 0.625     ][0.30555556 0.41666667 0.59322034 0.58333333][0.47222222 0.58333333 0.59322034 0.625     ][0.66666667 0.45833333 0.62711864 0.58333333][0.55555556 0.125      0.57627119 0.5       ][0.36111111 0.41666667 0.52542373 0.5       ][0.33333333 0.20833333 0.50847458 0.5       ][0.33333333 0.25       0.57627119 0.45833333][0.5        0.41666667 0.61016949 0.54166667][0.41666667 0.25       0.50847458 0.45833333][0.19444444 0.125      0.38983051 0.375     ][0.36111111 0.29166667 0.54237288 0.5       ][0.38888889 0.41666667 0.54237288 0.45833333][0.38888889 0.375      0.54237288 0.5       ][0.52777778 0.375      0.55932203 0.5       ][0.22222222 0.20833333 0.33898305 0.41666667][0.38888889 0.33333333 0.52542373 0.5       ][0.55555556 0.54166667 0.84745763 1.        ][0.41666667 0.29166667 0.69491525 0.75      ][0.77777778 0.41666667 0.83050847 0.83333333][0.55555556 0.375      0.77966102 0.70833333][0.61111111 0.41666667 0.81355932 0.875     ][0.91666667 0.41666667 0.94915254 0.83333333][0.16666667 0.20833333 0.59322034 0.66666667][0.83333333 0.375      0.89830508 0.70833333][0.66666667 0.20833333 0.81355932 0.70833333][0.80555556 0.66666667 0.86440678 1.        ][0.61111111 0.5        0.69491525 0.79166667][0.58333333 0.29166667 0.72881356 0.75      ][0.69444444 0.41666667 0.76271186 0.83333333][0.38888889 0.20833333 0.6779661  0.79166667][0.41666667 0.33333333 0.69491525 0.95833333][0.58333333 0.5        0.72881356 0.91666667][0.61111111 0.41666667 0.76271186 0.70833333][0.94444444 0.75       0.96610169 0.875     ][0.94444444 0.25       1.         0.91666667][0.47222222 0.08333333 0.6779661  0.58333333][0.72222222 0.5        0.79661017 0.91666667][0.36111111 0.33333333 0.66101695 0.79166667][0.94444444 0.33333333 0.96610169 0.79166667][0.55555556 0.29166667 0.66101695 0.70833333][0.66666667 0.54166667 0.79661017 0.83333333][0.80555556 0.5        0.84745763 0.70833333][0.52777778 0.33333333 0.6440678  0.70833333][0.5        0.41666667 0.66101695 0.70833333][0.58333333 0.33333333 0.77966102 0.83333333][0.80555556 0.41666667 0.81355932 0.625     ][0.86111111 0.33333333 0.86440678 0.75      ][1.         0.75       0.91525424 0.79166667][0.58333333 0.33333333 0.77966102 0.875     ][0.55555556 0.33333333 0.69491525 0.58333333][0.5        0.25       0.77966102 0.54166667][0.94444444 0.41666667 0.86440678 0.91666667][0.55555556 0.58333333 0.77966102 0.95833333][0.58333333 0.45833333 0.76271186 0.70833333][0.47222222 0.41666667 0.6440678  0.70833333][0.72222222 0.45833333 0.74576271 0.83333333][0.66666667 0.45833333 0.77966102 0.95833333][0.72222222 0.45833333 0.69491525 0.91666667][0.41666667 0.29166667 0.69491525 0.75      ][0.69444444 0.5        0.83050847 0.91666667][0.66666667 0.54166667 0.79661017 1.        ][0.66666667 0.41666667 0.71186441 0.91666667][0.55555556 0.20833333 0.6779661  0.75      ][0.61111111 0.41666667 0.71186441 0.79166667][0.52777778 0.58333333 0.74576271 0.91666667][0.44444444 0.41666667 0.69491525 0.70833333]]print(df7)
→[[-9.00681170e-01  1.01900435e+00 -1.34022653e+00 -1.31544430e+00][-1.14301691e+00 -1.31979479e-01 -1.34022653e+00 -1.31544430e+00][-1.38535265e+00  3.28414053e-01 -1.39706395e+00 -1.31544430e+00][-1.50652052e+00  9.82172869e-02 -1.28338910e+00 -1.31544430e+00][-1.02184904e+00  1.24920112e+00 -1.34022653e+00 -1.31544430e+00][-5.37177559e-01  1.93979142e+00 -1.16971425e+00 -1.05217993e+00][-1.50652052e+00  7.88807586e-01 -1.34022653e+00 -1.18381211e+00][-1.02184904e+00  7.88807586e-01 -1.28338910e+00 -1.31544430e+00][-1.74885626e+00 -3.62176246e-01 -1.34022653e+00 -1.31544430e+00][-1.14301691e+00  9.82172869e-02 -1.28338910e+00 -1.44707648e+00][-5.37177559e-01  1.47939788e+00 -1.28338910e+00 -1.31544430e+00][-1.26418478e+00  7.88807586e-01 -1.22655167e+00 -1.31544430e+00][-1.26418478e+00 -1.31979479e-01 -1.34022653e+00 -1.44707648e+00][-1.87002413e+00 -1.31979479e-01 -1.51073881e+00 -1.44707648e+00][-5.25060772e-02  2.16998818e+00 -1.45390138e+00 -1.31544430e+00][-1.73673948e-01  3.09077525e+00 -1.28338910e+00 -1.05217993e+00][-5.37177559e-01  1.93979142e+00 -1.39706395e+00 -1.05217993e+00][-9.00681170e-01  1.01900435e+00 -1.34022653e+00 -1.18381211e+00][-1.73673948e-01  1.70959465e+00 -1.16971425e+00 -1.18381211e+00][-9.00681170e-01  1.70959465e+00 -1.28338910e+00 -1.18381211e+00][-5.37177559e-01  7.88807586e-01 -1.16971425e+00 -1.31544430e+00][-9.00681170e-01  1.47939788e+00 -1.28338910e+00 -1.05217993e+00][-1.50652052e+00  1.24920112e+00 -1.56757623e+00 -1.31544430e+00][-9.00681170e-01  5.58610819e-01 -1.16971425e+00 -9.20547742e-01][-1.26418478e+00  7.88807586e-01 -1.05603939e+00 -1.31544430e+00][-1.02184904e+00 -1.31979479e-01 -1.22655167e+00 -1.31544430e+00][-1.02184904e+00  7.88807586e-01 -1.22655167e+00 -1.05217993e+00][-7.79513300e-01  1.01900435e+00 -1.28338910e+00 -1.31544430e+00][-7.79513300e-01  7.88807586e-01 -1.34022653e+00 -1.31544430e+00][-1.38535265e+00  3.28414053e-01 -1.22655167e+00 -1.31544430e+00][-1.26418478e+00  9.82172869e-02 -1.22655167e+00 -1.31544430e+00][-5.37177559e-01  7.88807586e-01 -1.28338910e+00 -1.05217993e+00][-7.79513300e-01  2.40018495e+00 -1.28338910e+00 -1.44707648e+00][-4.16009689e-01  2.63038172e+00 -1.34022653e+00 -1.31544430e+00][-1.14301691e+00  9.82172869e-02 -1.28338910e+00 -1.31544430e+00][-1.02184904e+00  3.28414053e-01 -1.45390138e+00 -1.31544430e+00][-4.16009689e-01  1.01900435e+00 -1.39706395e+00 -1.31544430e+00][-1.14301691e+00  1.24920112e+00 -1.34022653e+00 -1.44707648e+00][-1.74885626e+00 -1.31979479e-01 -1.39706395e+00 -1.31544430e+00][-9.00681170e-01  7.88807586e-01 -1.28338910e+00 -1.31544430e+00][-1.02184904e+00  1.01900435e+00 -1.39706395e+00 -1.18381211e+00][-1.62768839e+00 -1.74335684e+00 -1.39706395e+00 -1.18381211e+00][-1.74885626e+00  3.28414053e-01 -1.39706395e+00 -1.31544430e+00][-1.02184904e+00  1.01900435e+00 -1.22655167e+00 -7.88915558e-01][-9.00681170e-01  1.70959465e+00 -1.05603939e+00 -1.05217993e+00][-1.26418478e+00 -1.31979479e-01 -1.34022653e+00 -1.18381211e+00][-9.00681170e-01  1.70959465e+00 -1.22655167e+00 -1.31544430e+00][-1.50652052e+00  3.28414053e-01 -1.34022653e+00 -1.31544430e+00][-6.58345429e-01  1.47939788e+00 -1.28338910e+00 -1.31544430e+00][-1.02184904e+00  5.58610819e-01 -1.34022653e+00 -1.31544430e+00][ 1.40150837e+00  3.28414053e-01  5.35408562e-01  2.64141916e-01][ 6.74501145e-01  3.28414053e-01  4.21733708e-01  3.95774101e-01][ 1.28034050e+00  9.82172869e-02  6.49083415e-01  3.95774101e-01][-4.16009689e-01 -1.74335684e+00  1.37546573e-01  1.32509732e-01][ 7.95669016e-01 -5.92373012e-01  4.78571135e-01  3.95774101e-01][-1.73673948e-01 -5.92373012e-01  4.21733708e-01  1.32509732e-01][ 5.53333275e-01  5.58610819e-01  5.35408562e-01  5.27406285e-01][-1.14301691e+00 -1.51316008e+00 -2.60315415e-01 -2.62386821e-01][ 9.16836886e-01 -3.62176246e-01  4.78571135e-01  1.32509732e-01][-7.79513300e-01 -8.22569778e-01  8.07091462e-02  2.64141916e-01][-1.02184904e+00 -2.43394714e+00 -1.46640561e-01 -2.62386821e-01][ 6.86617933e-02 -1.31979479e-01  2.51221427e-01  3.95774101e-01][ 1.89829664e-01 -1.97355361e+00  1.37546573e-01 -2.62386821e-01][ 3.10997534e-01 -3.62176246e-01  5.35408562e-01  2.64141916e-01][-2.94841818e-01 -3.62176246e-01 -8.98031345e-02  1.32509732e-01][ 1.03800476e+00  9.82172869e-02  3.64896281e-01  2.64141916e-01][-2.94841818e-01 -1.31979479e-01  4.21733708e-01  3.95774101e-01][-5.25060772e-02 -8.22569778e-01  1.94384000e-01 -2.62386821e-01][ 4.32165405e-01 -1.97355361e+00  4.21733708e-01  3.95774101e-01][-2.94841818e-01 -1.28296331e+00  8.07091462e-02 -1.30754636e-01][ 6.86617933e-02  3.28414053e-01  5.92245988e-01  7.90670654e-01][ 3.10997534e-01 -5.92373012e-01  1.37546573e-01  1.32509732e-01][ 5.53333275e-01 -1.28296331e+00  6.49083415e-01  3.95774101e-01][ 3.10997534e-01 -5.92373012e-01  5.35408562e-01  8.77547895e-04][ 6.74501145e-01 -3.62176246e-01  3.08058854e-01  1.32509732e-01][ 9.16836886e-01 -1.31979479e-01  3.64896281e-01  2.64141916e-01][ 1.15917263e+00 -5.92373012e-01  5.92245988e-01  2.64141916e-01][ 1.03800476e+00 -1.31979479e-01  7.05920842e-01  6.59038469e-01][ 1.89829664e-01 -3.62176246e-01  4.21733708e-01  3.95774101e-01][-1.73673948e-01 -1.05276654e+00 -1.46640561e-01 -2.62386821e-01][-4.16009689e-01 -1.51316008e+00  2.38717193e-02 -1.30754636e-01][-4.16009689e-01 -1.51316008e+00 -3.29657076e-02 -2.62386821e-01][-5.25060772e-02 -8.22569778e-01  8.07091462e-02  8.77547895e-04][ 1.89829664e-01 -8.22569778e-01  7.62758269e-01  5.27406285e-01][-5.37177559e-01 -1.31979479e-01  4.21733708e-01  3.95774101e-01][ 1.89829664e-01  7.88807586e-01  4.21733708e-01  5.27406285e-01][ 1.03800476e+00  9.82172869e-02  5.35408562e-01  3.95774101e-01][ 5.53333275e-01 -1.74335684e+00  3.64896281e-01  1.32509732e-01][-2.94841818e-01 -1.31979479e-01  1.94384000e-01  1.32509732e-01][-4.16009689e-01 -1.28296331e+00  1.37546573e-01  1.32509732e-01][-4.16009689e-01 -1.05276654e+00  3.64896281e-01  8.77547895e-04][ 3.10997534e-01 -1.31979479e-01  4.78571135e-01  2.64141916e-01][-5.25060772e-02 -1.05276654e+00  1.37546573e-01  8.77547895e-04][-1.02184904e+00 -1.74335684e+00 -2.60315415e-01 -2.62386821e-01][-2.94841818e-01 -8.22569778e-01  2.51221427e-01  1.32509732e-01][-1.73673948e-01 -1.31979479e-01  2.51221427e-01  8.77547895e-04][-1.73673948e-01 -3.62176246e-01  2.51221427e-01  1.32509732e-01][ 4.32165405e-01 -3.62176246e-01  3.08058854e-01  1.32509732e-01][-9.00681170e-01 -1.28296331e+00 -4.30827696e-01 -1.30754636e-01][-1.73673948e-01 -5.92373012e-01  1.94384000e-01  1.32509732e-01][ 5.53333275e-01  5.58610819e-01  1.27429511e+00  1.71209594e+00][-5.25060772e-02 -8.22569778e-01  7.62758269e-01  9.22302838e-01][ 1.52267624e+00 -1.31979479e-01  1.21745768e+00  1.18556721e+00][ 5.53333275e-01 -3.62176246e-01  1.04694540e+00  7.90670654e-01][ 7.95669016e-01 -1.31979479e-01  1.16062026e+00  1.31719939e+00][ 2.12851559e+00 -1.31979479e-01  1.61531967e+00  1.18556721e+00][-1.14301691e+00 -1.28296331e+00  4.21733708e-01  6.59038469e-01][ 1.76501198e+00 -3.62176246e-01  1.44480739e+00  7.90670654e-01][ 1.03800476e+00 -1.28296331e+00  1.16062026e+00  7.90670654e-01][ 1.64384411e+00  1.24920112e+00  1.33113254e+00  1.71209594e+00][ 7.95669016e-01  3.28414053e-01  7.62758269e-01  1.05393502e+00][ 6.74501145e-01 -8.22569778e-01  8.76433123e-01  9.22302838e-01][ 1.15917263e+00 -1.31979479e-01  9.90107977e-01  1.18556721e+00][-1.73673948e-01 -1.28296331e+00  7.05920842e-01  1.05393502e+00][-5.25060772e-02 -5.92373012e-01  7.62758269e-01  1.58046376e+00][ 6.74501145e-01  3.28414053e-01  8.76433123e-01  1.44883158e+00][ 7.95669016e-01 -1.31979479e-01  9.90107977e-01  7.90670654e-01][ 2.24968346e+00  1.70959465e+00  1.67215710e+00  1.31719939e+00][ 2.24968346e+00 -1.05276654e+00  1.78583195e+00  1.44883158e+00][ 1.89829664e-01 -1.97355361e+00  7.05920842e-01  3.95774101e-01][ 1.28034050e+00  3.28414053e-01  1.10378283e+00  1.44883158e+00][-2.94841818e-01 -5.92373012e-01  6.49083415e-01  1.05393502e+00][ 2.24968346e+00 -5.92373012e-01  1.67215710e+00  1.05393502e+00][ 5.53333275e-01 -8.22569778e-01  6.49083415e-01  7.90670654e-01][ 1.03800476e+00  5.58610819e-01  1.10378283e+00  1.18556721e+00][ 1.64384411e+00  3.28414053e-01  1.27429511e+00  7.90670654e-01][ 4.32165405e-01 -5.92373012e-01  5.92245988e-01  7.90670654e-01][ 3.10997534e-01 -1.31979479e-01  6.49083415e-01  7.90670654e-01][ 6.74501145e-01 -5.92373012e-01  1.04694540e+00  1.18556721e+00][ 1.64384411e+00 -1.31979479e-01  1.16062026e+00  5.27406285e-01][ 1.88617985e+00 -5.92373012e-01  1.33113254e+00  9.22302838e-01][ 2.49201920e+00  1.70959465e+00  1.50164482e+00  1.05393502e+00][ 6.74501145e-01 -5.92373012e-01  1.04694540e+00  1.31719939e+00][ 5.53333275e-01 -5.92373012e-01  7.62758269e-01  3.95774101e-01][ 3.10997534e-01 -1.05276654e+00  1.04694540e+00  2.64141916e-01][ 2.24968346e+00 -1.31979479e-01  1.33113254e+00  1.44883158e+00][ 5.53333275e-01  7.88807586e-01  1.04694540e+00  1.58046376e+00][ 6.74501145e-01  9.82172869e-02  9.90107977e-01  7.90670654e-01][ 1.89829664e-01 -1.31979479e-01  5.92245988e-01  7.90670654e-01][ 1.28034050e+00  9.82172869e-02  9.33270550e-01  1.18556721e+00][ 1.03800476e+00  9.82172869e-02  1.04694540e+00  1.58046376e+00][ 1.28034050e+00  9.82172869e-02  7.62758269e-01  1.44883158e+00][-5.25060772e-02 -8.22569778e-01  7.62758269e-01  9.22302838e-01][ 1.15917263e+00  3.28414053e-01  1.21745768e+00  1.44883158e+00][ 1.03800476e+00  5.58610819e-01  1.10378283e+00  1.71209594e+00][ 1.03800476e+00 -1.31979479e-01  8.19595696e-01  1.44883158e+00][ 5.53333275e-01 -1.28296331e+00  7.05920842e-01  9.22302838e-01][ 7.95669016e-01 -1.31979479e-01  8.19595696e-01  1.05393502e+00][ 4.32165405e-01  7.88807586e-01  9.33270550e-01  1.44883158e+00][ 6.86617933e-02 -1.31979479e-01  7.62758269e-01  7.90670654e-01]]



ages = [31, 27, 11, 38, 15, 74, 44, 32, 54, 63, 41, 23]
bins = [15, 25, 45, 65, 100]
group_names = ["A", "B", "C", "D"]
① 等宽法


print(list(pd.cut(ages, bins, labels=group_names)))
→['B', 'B', nan, 'B', nan, 'D', 'B', 'B', 'C', 'C', 'B', 'A']
② 等频法


print(list(pd.qcut(ages, 4)))
→[Interval(26.0, 35.0, closed='right'), Interval(26.0, 35.0, closed='right'), 
Interval(10.999, 26.0, closed='right'), Interval(35.0, 46.5, closed='right'), 
Interval(10.999, 26.0, closed='right'), Interval(46.5, 74.0, closed='right'), 
Interval(35.0, 46.5, closed='right'), Interval(26.0, 35.0, closed='right'), 
Interval(46.5, 74.0, closed='right'), Interval(46.5, 74.0, closed='right'), 
Interval(35.0, 46.5, closed='right'), Interval(10.999, 26.0, closed='right')]



df8 = pd.DataFrame({"gender": ["male", "female", "male", "male", "female"]})print(df8["gender"].replace(["male", "female"], [1.0, 0.0]))
→0    1.01    0.02    1.03    1.04    0.0Name: gender, dtype: float64print(df8["gender"].map({"male": 1.0, "female": 0.0}))
→0    1.01    0.0 2    1.03    1.04    0.0Name: gender, dtype: float64



① 属性规约


② 数值规约



存放数值规约表示的非参数方法包括: 直方图、聚类、抽样和数据立方体聚类。





