%run ./hello.py
import mymodule.FirstML
2.1
测试结果表明,运行了一千次,取有价值的7次,平均每次耗时324+/-5.7 μs(有多少次循环是由Jupyter Notebook自动决定的)
2.2
3.1
本次测试时间比上面的测试时间会多,是因为只测试了一次。可能不够准确
使用%timeit 测试多次在每次测试的执行性能不一样的时候测试结果会不准确。 考虑用%timeit 测试一个排序算法,由于第一次执行完毕后数组已经排好序,那么在后面执行的时候,如果使用插入排序等算法就会导致后面999次的时间非常短,导致测试值不准确
3.2
4.1
4.1
1
2.1
2.2
3.1
3.2
1.1
1.2
2.1
注意full 默认的数据类型是整形
3.1
3.2
4.1
4.2
0
1
2.1
2.2
2.3
2.4
3.1
3.2
1.1
1.2
2.1
2.2
分割的意义:将特征矩阵和label向量分割开
2.3
1.1
1.2
1.3
2.1
2.2
2.3
A = np.arange(4).reshape(2,2)
A
array([[0, 1],
[2, 3]])
B = np.full((2,2),10)
B
array([[10, 10],
[10, 10]])
A+B
array([[10, 11],
[12, 13]])
A-B
array([[-10, -9],
[ -8, -7]])
# 对应元素相乘
A*B
array([[ 0, 10],
[20, 30]])
A/B
array([[0. , 0.1],
[0.2, 0.3]])
# 矩阵乘法 A矩阵的每一行和B矩阵的每一列做相乘再相加
# A的第i行B的第j列相乘再相加等到的结果是(i,j)坐标的值
A.dot(B)
array([[10, 10],
[50, 50]])
A
array([[0, 1],
[2, 3]])
# 转置 行变列
A.T
array([[0, 2],
[1, 3]])
# 要确保矩阵之间可运算
C = np.full((3,3,3),666)
C+A
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-36-89b16f572c60> in <module>()
1 C = np.full((3,3,3),666)
----> 2 C+A
ValueError: operands could not be broadcast together with shapes (3,3,3) (2,2)
v = np.array([1,2])
A
array([[0, 1],
[2, 3]])
# 向量和矩阵中的每一行做加法
# 自动将低维的元素与高维的每一维的元素进行元素
v+A
array([[1, 3],
[3, 5]])
np.vstack([v]*A.shape[0])
array([[1, 2],
[1, 2]])
np.vstack([v]*A.shape[0])+A
array([[1, 3],
[3, 5]])
# 第二个参数传入一个元组,里面的值分别为v向量在行上面堆叠的次数,和列方向上堆叠的次数
np.tile(v,(2,1))+A
array([[1, 3],
[3, 5]])
v*A
array([[0, 2],
[2, 6]])
v.dot(A)
array([4, 7])
# 向量和矩阵进行运算,会自动判断向量应该取行向量还是列向量
# 这里会自动将v转换成列向量
A.dot(v)
A
array([[0, 1],
[2, 3]])
# [linalg]linear algebra [inv] inverse
# 主对角线除以2区负数并对换位置,辅对角线除以2并对换位置(不取负)
# 至少要是一个方阵,才能求逆矩阵
invA = np.linalg.inv(A)
invA
array([[-1.5, 0.5],
[ 1. , 0. ]])
# 原矩阵乘以逆矩阵,得到的是单位矩阵
A.dot(invA)
array([[1., 0.],
[0., 1.]])
# 逆矩阵乘以原矩阵,得到的是单位矩阵
invA.dot(A)
array([[1., 0.],
[0., 1.]])
# [pinv ]puppet inverse 伪逆阵
#
X = np.arange(16).reshape(2,8)
pinvX = np.linalg.pinv(X)
pinvX
array([[-1.35416667e-01, 5.20833333e-02],
[-1.01190476e-01, 4.16666667e-02],
[-6.69642857e-02, 3.12500000e-02],
[-3.27380952e-02, 2.08333333e-02],
[ 1.48809524e-03, 1.04166667e-02],
[ 3.57142857e-02, -9.65081753e-18],
[ 6.99404762e-02, -1.04166667e-02],
[ 1.04166667e-01, -2.08333333e-02]])
pinvX.shape
(8, 2)
# 伪逆矩阵的属性,原矩阵乘以伪逆矩阵得到的是单位矩阵,
X.dot(pinvX)
array([[ 1.00000000e+00, -2.42861287e-16],
[-5.41233725e-16, 1.00000000e+00]])
# 伪逆矩阵乘以矩阵不一定
pinvX.dot(X)
array([[ 4.16666667e-01, 3.33333333e-01, 2.50000000e-01,
1.66666667e-01, 8.33333333e-02, 3.95516953e-16,
-8.33333333e-02, -1.66666667e-01],
[ 3.33333333e-01, 2.73809524e-01, 2.14285714e-01,
1.54761905e-01, 9.52380952e-02, 3.57142857e-02,
-2.38095238e-02, -8.33333333e-02],
[ 2.50000000e-01, 2.14285714e-01, 1.78571429e-01,
1.42857143e-01, 1.07142857e-01, 7.14285714e-02,
3.57142857e-02, 2.22044605e-16],
[ 1.66666667e-01, 1.54761905e-01, 1.42857143e-01,
1.30952381e-01, 1.19047619e-01, 1.07142857e-01,
9.52380952e-02, 8.33333333e-02],
[ 8.33333333e-02, 9.52380952e-02, 1.07142857e-01,
1.19047619e-01, 1.30952381e-01, 1.42857143e-01,
1.54761905e-01, 1.66666667e-01],
[-7.72065402e-17, 3.57142857e-02, 7.14285714e-02,
1.07142857e-01, 1.42857143e-01, 1.78571429e-01,
2.14285714e-01, 2.50000000e-01],
[-8.33333333e-02, -2.38095238e-02, 3.57142857e-02,
9.52380952e-02, 1.54761905e-01, 2.14285714e-01,
2.73809524e-01, 3.33333333e-01],
[-1.66666667e-01, -8.33333333e-02, -2.63677968e-16,
8.33333333e-02, 1.66666667e-01, 2.50000000e-01,
3.33333333e-01, 4.16666667e-01]])
import numpy as np
L = np.random.random(100)
L
array([7.73124723e-01, 2.55426783e-01, 3.01399746e-01, 2.61090729e-01,
9.37397881e-01, 7.38703342e-01, 6.71044552e-01, 5.45700398e-01,
8.77468628e-01, 4.61853354e-03, 5.60603850e-01, 5.79218238e-01,
3.88654023e-01, 9.48737462e-01, 4.92815852e-01, 3.01065857e-01,
1.40609072e-01, 7.05760888e-01, 9.96759228e-01, 2.76513153e-01,
3.81988266e-01, 3.03864793e-01, 9.99262986e-01, 9.33638250e-01,
9.78007851e-01, 7.52558535e-02, 5.01134242e-01, 6.14598914e-01,
5.25678503e-04, 6.19223710e-01, 6.66315716e-01, 1.90884302e-01,
3.02813997e-01, 5.13613830e-01, 2.98942305e-01, 2.49488945e-01,
2.39454358e-01, 3.04904423e-02, 6.49002160e-01, 7.51330392e-01,
5.17789342e-01, 9.86577863e-01, 9.29245299e-02, 2.90256102e-02,
2.41292834e-01, 6.56942621e-01, 7.69101258e-01, 4.37370938e-01,
4.59433757e-01, 5.86539559e-01, 2.05978128e-01, 8.45214439e-01,
5.67217564e-01, 6.81286007e-01, 4.93603790e-01, 2.83595533e-01,
6.29692159e-02, 1.78184831e-02, 5.83401708e-02, 6.20231400e-01,
1.04547868e-01, 1.00415741e-01, 6.91171478e-02, 6.85892030e-01,
9.29855003e-01, 9.99141931e-01, 8.12123673e-01, 1.01615247e-02,
1.00683233e-02, 9.01578390e-01, 6.45368728e-01, 6.71096622e-01,
8.18277343e-01, 6.98780333e-01, 1.49821058e-01, 7.92797641e-01,
4.68906841e-01, 8.65617714e-01, 4.02349154e-01, 2.76732044e-01,
9.16048672e-01, 1.02965929e-01, 4.14111883e-01, 5.86467521e-01,
1.25875230e-01, 3.53990446e-01, 5.62178220e-02, 3.73338109e-01,
2.29659037e-01, 7.08210283e-01, 5.39074658e-01, 8.85796909e-01,
1.94722922e-01, 6.34048518e-01, 2.45683429e-01, 6.93775369e-01,
3.48923102e-01, 8.95471962e-01, 5.12577771e-01, 4.12440566e-01])
sum(L)
48.31185222315484
np.sum(L)
48.311852223154865
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)
90.9 ms ± 3.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
416 µs ± 18.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
np.min(big_array)
4.152622914421755e-07
np.max(big_array)
0.9999999324757766
big_array.min()
4.152622914421755e-07
big_array.max()
0.9999999324757766
X = np.arange(16).reshape(4,-1)
X
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
# 默认将所有的元素进行聚合
np.sum(X)
120
# 计算每一列的和,沿着行的方向进行计算(对每一列进行运算)(压缩掉传入的维度)
np.sum(X,axis=0)
array([24, 28, 32, 36])
np.sum(X,axis=1)
array([ 6, 22, 38, 54])
# 所有元素乘积
np.prod(X)
0
np.prod(X+1)
20922789888000
# 均值
np.mean(X)
7.5
# 中位数
np.median(X)
7.5
# 百分位
np.percentile(big_array,q=50)
0.19947456304434275
np.percentile(big_array,q=20)
0.19947456304434275
# 五个经常使用的百分位
for percent in (0,25,50,75,100):
print(np.percentile(big_array,q=percent))
4.152622914421755e-07
0.2493020139081539
0.4999999491754548
0.7499901597006043
0.9999999324757766
# 方差
np.var(big_array)
0.08338181012540496
# 标准差(方差的算术平方根)
np.std(big_array)
0.28875908665426436
x = np.random.normal(0,1,size=1000000)
np.mean(x)
0.0004949797743234279
np.std(x)
1.0001058659212774
# 最小值
np.min(x)
-5.119655460488136
# 最小值的索引
np.argmin(x)
800272
x = np.arange(16)
# 乱序
np.random.shuffle(x)
x
array([11, 14, 13, 2, 9, 15, 7, 1, 4, 0, 5, 10, 6, 3, 12, 8])
# 排序
np.sort(x)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
# 上面只是返回了排好的数组,原数组依旧无序
x
array([11, 14, 13, 2, 9, 15, 7, 1, 4, 0, 5, 10, 6, 3, 12, 8])
# 对原数组排序
x.sort()
x
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
# 创建二维数组
X = np.random.randint(10,size=(4,4))
X
array([[2, 3, 1, 3],
[4, 2, 1, 6],
[3, 6, 2, 3],
[8, 9, 3, 8]])
# 对二维数组进行排序,默认方向是沿着列方向
np.sort(X)
array([[1, 2, 3, 3],
[1, 2, 4, 6],
[2, 3, 3, 6],
[3, 8, 8, 9]])
np.sort(X,axis=1)
array([[1, 2, 3, 3],
[1, 2, 4, 6],
[2, 3, 3, 6],
[3, 8, 8, 9]])
np.sort(X,axis=0)
array([[2, 2, 1, 3],
[3, 3, 1, 3],
[4, 6, 2, 6],
[8, 9, 3, 8]])
# 再次打乱原一位数组
np.random.shuffle(x)
x
array([14, 8, 12, 0, 3, 7, 11, 10, 1, 15, 6, 4, 5, 2, 13, 9])
# 返回数组排好序的元素索引位置
np.argsort(x)
array([ 3, 8, 13, 4, 11, 12, 10, 5, 1, 15, 7, 6, 2, 14, 0, 9])
# 快排,第二个参数是标定点
np.partition(x,3)
array([ 0, 1, 2, 3, 4, 7, 5, 8, 9, 6, 10, 15, 11, 12, 13, 14])
# 返回的是索引
np.argpartition(x,3)
array([ 3, 8, 13, 4, 11, 5, 12, 1, 15, 10, 7, 9, 6, 2, 14, 0])
# 沿着列的方向(对行)进行排序,返回索引
np.argsort(X,axis=1)
array([[2, 0, 1, 3],
[2, 1, 0, 3],
[2, 0, 3, 1],
[2, 0, 3, 1]])
# 第二个参数是每行或者每列的标定点,第三个参数是方向
np.argpartition(X,2,axis=1)
array([[2, 0, 1, 3],
[2, 1, 0, 3],
[2, 0, 3, 1],
[2, 0, 3, 1]])
import numpy as np
x = np.arange(16)
x
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
### 支持通过boolean数组来获取元素,True代表感兴趣,False代表不感兴趣3
x[3]
3
# 获取数组的子区间
x[3:9]
array([3, 4, 5, 6, 7, 8])
# 获取数组的等步长的区间
x[3:9:2]
array([3, 5, 7])
ind = [3,5,8]
x[ind]
array([3, 5, 8])
ind = np.array([[0,2],[1,3]])
x[ind]
array([[0, 2],
[1, 3]])
X = x.reshape(4,-1)
X
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
help(np.reshape)
Help on function reshape in module numpy.core.fromnumeric:
reshape(a, newshape, order='C')
Gives a new shape to an array without changing its data.
Parameters
----------
a : array_like
Array to be reshaped.
newshape : int or tuple of ints
The new shape should be compatible with the original shape. If
an integer, then the result will be a 1-D array of that length.
One shape dimension can be -1. In this case, the value is
inferred from the length of the array and remaining dimensions.
order : {'C', 'F', 'A'}, optional
Read the elements of `a` using this index order, and place the
elements into the reshaped array using this index order. 'C'
means to read / write the elements using C-like index order,
with the last axis index changing fastest, back to the first
axis index changing slowest. 'F' means to read / write the
elements using Fortran-like index order, with the first index
changing fastest, and the last index changing slowest. Note that
the 'C' and 'F' options take no account of the memory layout of
the underlying array, and only refer to the order of indexing.
'A' means to read / write the elements in Fortran-like index
order if `a` is Fortran *contiguous* in memory, C-like order
otherwise.
Returns
-------
reshaped_array : ndarray
This will be a new view object if possible; otherwise, it will
be a copy. Note there is no guarantee of the *memory layout* (C- or
Fortran- contiguous) of the returned array.
See Also
--------
ndarray.reshape : Equivalent method.
Notes
-----
It is not always possible to change the shape of an array without
copying the data. If you want an error to be raised when the data is copied,
you should assign the new shape to the shape attribute of the array::
>>> a = np.zeros((10, 2))
# A transpose makes the array non-contiguous
>>> b = a.T
# Taking a view makes it possible to modify the shape without modifying
# the initial object.
>>> c = b.view()
>>> c.shape = (20)
AttributeError: incompatible shape for a non-contiguous array
The `order` keyword gives the index ordering both for *fetching* the values
from `a`, and then *placing* the values into the output array.
For example, let's say you have an array:
>>> a = np.arange(6).reshape((3, 2))
>>> a
array([[0, 1],
[2, 3],
[4, 5]])
You can think of reshaping as first raveling the array (using the given
index order), then inserting the elements from the raveled array into the
new array using the same kind of index ordering as was used for the
raveling.
>>> np.reshape(a, (2, 3)) # C-like index ordering
array([[0, 1, 2],
[3, 4, 5]])
>>> np.reshape(np.ravel(a), (2, 3)) # equivalent to C ravel then C reshape
array([[0, 1, 2],
[3, 4, 5]])
>>> np.reshape(a, (2, 3), order='F') # Fortran-like index ordering
array([[0, 4, 3],
[2, 1, 5]])
>>> np.reshape(np.ravel(a, order='F'), (2, 3), order='F')
array([[0, 4, 3],
[2, 1, 5]])
Examples
--------
>>> a = np.array([[1,2,3], [4,5,6]])
>>> np.reshape(a, 6)
array([1, 2, 3, 4, 5, 6])
>>> np.reshape(a, 6, order='F')
array([1, 4, 2, 5, 3, 6])
>>> np.reshape(a, (3,-1)) # the unspecified value is inferred to be 2
array([[1, 2],
[3, 4],
[5, 6]])
row = np.array([0,1,2])
col = np.array([1,2,3])
X[row,col]
array([ 1, 6, 11])
X[0,col]
array([1, 2, 3])
X[:2,col]
array([[1, 2, 3],
[5, 6, 7]])
col = [True,False,True,True]
X[1:3,col]
array([[ 4, 6, 7],
[ 8, 10, 11]])
x
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
x<3
array([ True, True, True, False, False, False, False, False, False,
False, False, False, False, False, False, False])
x==3
array([False, False, False, True, False, False, False, False, False,
False, False, False, False, False, False, False])
2*x == 24 - 4*x
array([False, False, False, False, True, False, False, False, False,
False, False, False, False, False, False, False])
X<6
array([[ True, True, True, True],
[ True, True, False, False],
[False, False, False, False],
[False, False, False, False]])
np.sum(x<=3)
4
# 非0元素,在bool数组中true为1,false为0
np.count_nonzero(x<=3)
4
# 任意一个等于0则返回True
np.any(x==0)
True
# 所有的元素都等于0返回True
np.all(x==0)
False
# 沿着列的方向(看每一行)有多少偶数
np.sum(X%2==0,axis=1)
array([2, 2, 2, 2])
# x是一个数组,对数组分别进行>3和<10的操作以后再做位运算,所以是 &不是&&
np.sum((x>3) & (x<10))
6
# x<5 获取的是坐标,x[x<5]获取的是具体的值
x[x<5]
array([0, 1, 2, 3, 4])