import numpy
numpy.__version__
import numpy as np
L = list(range(10))
L
type(L[0])
L2 = [str(c) for c in L]
L2
np.array([1,2,3,4])
np.array([1,2,3,4.55])
np.array([1,2,3,4], dtype='float32')
# 嵌套列表构成多维数组
np.array([range(i, i+3) for i in [2,4,6]])
np.zeros(10, dtype=int)
np.ones((3,5), dtype=float)
np.full((3,5), 3.14)
np.arange(0,20,2)
np.linspace(0,1,5)
np.random.random((3,3))
np.random.normal(0,1,(3,3))
np.random.randint(0,10,(3,3))
np.eye(3)
# 创建一个由3个整型数组成的微初始化的数组
# 数组的值是内存空间中的任意值
np.empty(3)
np.random.seed(0)
x1 = np.random.randint(10, size = 6) # 一维数组
x2 = np.random.randint(10, size = (3,4)) # 二维数组
x3 = np.random.randint(10, size = (3,4,5)) # 三维数组
print("x3 ndim: ", x3.ndim) #数组的维度
print("x3 shape:", x3.shape) #数组每个维度的大小
print("x3 size:", x3.size) #数组的总大小
print("dtype: ", x3.dtype) #数组的数据类型
print("itemsize: ", x3.itemsize, "bytes") #每个数组元素字节大小
print("nbytes: ", x3.nbytes, "bytes")
x1
x1[-1]
x1[-2]
x2
x2[0,0]
x2[2, -1]
注意:和Python列表不同,Numpy数组是固定类型的,这意味着当你试图将一个浮点值插入一个整型数组时,浮点值会被截短成整型。并且这种截短是自动完成的,不会给你提示或警告!
x1
x1[0] = 3.14159
x1
x[start:stop:step]
x = np.arange(10)
x
x[:5]
x[5:]
x[4:7]
x[::2]
x[1::2]
# 逆序数组
x[::-1]
x[5::-2]
x2
x2[:2,:3]
x2[::-1,::-1]
grid = np.arange(1,10).reshape((3,3))
print(grid)
x = np.array([1,2,3])
x.reshape((1,3))
# 通过newaxis获得的行向量
x[np.newaxis,:]
x.reshape((3,1))
x[:,np.newaxis]
x = np.array([1,2,3])
y = np.array([3,2,1])
np.concatenate([x, y])
z = [99,99,99]
print(np.concatenate([x, y, z]))
grid = np.array([[1,2,3],[4,5,6]])
# 沿着第一个轴拼接
np.concatenate([grid, grid])
# 沿着第二个轴拼接(从0开始索引)
np.concatenate([grid, grid], axis=1)
沿着固定维度处理数组时,使用np.vstack
和np.hstack
函数更简洁:
x = np.array([1,2,3])
grid = np.array([[9,8,7],[6,5,4]])
np.vstack([x, grid])
y = np.array([[99],[99]])
np.hstack([grid, y])
与拼接相反的过程是分裂。分裂可以通过np.split
、np.hsplit
、np.vsplit
函数来实现。
x = [1,2,3,99,99,3,2,1]
x1, x2, x3 = np.split(x, [3,5])
print(x1, x2, x3)
值得注意的是N个分裂点会得到N+1个子数组。
grid = np.arange(16).reshape((4,4))
grid
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)
left,right = np.hsplit(grid, [2])
print(left)
print(right)
+
: np.add
(1+1=2)
-
: np.subtract
(3-2=1)
-
: np.negative
(-2)
*
: np.multipy
(2*3=6)
/
: np.divide
(3/2=1.5)
//
: np.floor_divide
(3//2=1)
**
: np.power
(2**3=8)
%
: np.mod
(9%4=1)
x = np.array([-2,-1,0,1,2])
np.abs(x)
theta = np.linspace(0, np.pi, 3)
print("theta = ", theta)
print("sin(theta) = ", np.sin(theta))
print("cos(theta) = ", np.cos(theta))
print("tan(theta) = ", np.tan(theta))
x = [-1,0,1]
print("x = ", x)
print("arcsin(x)=", np.arcsin(x))
print("arccos(x)=", np.arccos(x))
print("arctan(x)=", np.arctan(x))
x = [1,2,3]
print("x =", x)
print("e^x =", np.exp(x))
print("2^x =", np.exp2(x))
print("3^x =", np.power(3, x))
用通用函数的reduce
方法对给定的元素和操作重复执行,直到得到单个的结果
注意:在一些特俗情况中,NumPy提供了专用函数(np.sum
,np.prod
,np.cumsum
, np.cumprod
),它们也可以实现reduce的功能
x = np.arange(1,6)
x
np.add.reduce(x)
np.multiply.reduce(x)
# 如果需要存储每次计算的中间结果,可以使用accumulate:
np.add.accumulate(x)
np.multiply.accumulate(x)
任何通用函数都可以用outer
方法获得两个不同输入数组所有元素对的函数运算结果
x = np.arange(1, 6)
np.multiply.outer(x, x)
x = np.array([True, True, False, True])
y = np.array([True, True, True, True])
np.any(x)
np.all(x)
np.all(y)
import pandas as pd
data = pd.read_csv('/Users/swami/Desktop/PythonDataScienceHandbook-master/notebooks/data/president_heights.csv')
data.head()
height = np.array(data['height(cm)'])
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set() #设置绘图风格
plt.hist(height)
plt.title('Height Distribution of US Presidents')
plt.xlabel('height (cm)')
plt.ylabel('number')
广播可以简单理解为用于不同大小数组的二进制通用函数(加、减、乘等)的一组规则
a = np.array([0,1,2])
b = np.array([5,5,5])
a + b
a + 5
M = np.ones((3,3))
M
M + a
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]
print(a,'\n')
print(b)
a + b
X = np.random.random((10,3))
Xmean = X.mean(0) #沿着第一个维度聚合
Xmean
# 通过广播从X数组的元素中减去这个均值实现归一化
X_centered = X - Xmean
# 进一步核对,看看归一化的数组的均值是否接近0
X_centered.mean(0)
广播的另一个非常有用的地方在于它能基于二维函数现实图像,定义一个函数z=f(x,y)
,用广播沿着数值区间计算该函数:
x = np.linspace(0,5, 50)
y = np.linspace(0,5, 50)[:, np.newaxis]
z = np.sin(x)**10 + np.cos(10 + y*x)*np.cos(x)
%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(z, origin='lower', extent=[0,5,0,5], cmap='viridis')
plt.colorbar()
rainfall = pd.read_csv('/Users/swami/Desktop/PythonDataScienceHandbook-master/notebooks/data/Seattle2014.csv')['PRCP'].values
inches = rainfall / 254 # 1/10mm -> inches
inches.shape
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set() #设置绘图风格
plt.hist(inches)
==
: np.equal
!=
: np.not_equal
<
: np.less
<=
: np.less_equal
>
: np.greater
>=
: np.greater_equal
rng = np.random.RandomState(0)
x = rng.randint(10, size = (3,4))
x
x < 6
# 有多少值小于6?
np.count_nonzero(x < 6)
np.sum(x < 6)
np.sum(x < 6, axis=1)