Austin Godber
@godber
DesertPy - 8/27/2014
apt-get install python-numpy
Typically imported as np
for brevity.
%matplotlib inline
import numpy as np
import scipy
import matplotlib.pylab as plt
ndarray
- Multidimensional array objectndarray
without loopsAt the core of NumPy we have the ndarray
object.
ndarray
is a multidimensional container for homogeneous data, which has attributes: shape
, dtype
and supports many vectorized operations.
data = range(10)
# [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
array = np.array(data)
print "array contains %s" % array
array contains [0 1 2 3 4 5 6 7 8 9]
print "array.shape is %s, array.dtype is %s" % (array.shape, array.dtype)
array.shape is (10,), array.dtype is int64
array**2
array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81])
There are many methods that can create ndarray
objects. Some are shown below
array
, asarray
arange
, linspace
and meshgrid
ones
and ones_like
zeros
and zeros_like
empty
and empty_like
eye
and identity
fromfile
, fromfunction
and loadtxt
http://scipy.org/docs/numpy/reference/routines.array-creation.html
array
- pass an array type object and optionally dtype
np.array([1, 2, 3])
array([1, 2, 3])
a1 = np.array([1, 2, 3.0]) # automatic upcast to float64
print a1, a1.dtype
[ 1. 2. 3.] float64
a2 = np.array([1, 2, 3], dtype='float64')
print a2, a2.dtype
[ 1. 2. 3.] float64
Multiple Dimensions
a3 = np.array([[0, 1, 2], [3, 4, 5]])
print a3, a3.shape, a3.size, a3.dtype
[[0 1 2] [3 4 5]] (2, 3) 6 int64
asarray
like array
but existing arrays are not copied
a1
array([ 1., 2., 3.])
np.array(a1) is a1 # a1 data copied to create new ndarray
False
np.asarray(a1) is a1 # a1 is referenced rather than copied
True
arange
returns ndarray
of evenly spaced values within a given interval
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ar1 = np.arange(0, 19, 2)
ar1
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
ones
and ones_like
returns an array full of ones
np.ones(5)
array([ 1., 1., 1., 1., 1.])
np.ones((2,3))
array([[ 1., 1., 1.], [ 1., 1., 1.]])
np.ones_like(ar1)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
zeros
and zeros_like
returns array of zerosempty
and empty_like
returns empty, uninitialized array (junk)np.empty((3,1))
array([[ 6.93416554e-310], [ 6.93416554e-310], [ 6.93413712e-310]])
eye
and identity
returns identity matrix
np.eye(3)
array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]])
genfromtxt
- generate array from StringIO
objectbool_
int_
, int8
, int16
, int32
, int64
uint8
, uint16
, uint32
, uint64
float_
, float16
, float32
, float64
complex_
, complex64
, complex128
dta1 = np.array([1,2,3], dtype=np.float64)
dta2 = np.array([1,2,3], dtype='float64')
dta3 = np.array([1,2,3], dtype='float_')
print dta1, dta2, dta3, '\n', dta1.dtype, dta2.dtype, dta3.dtype
[ 1. 2. 3.] [ 1. 2. 3.] [ 1. 2. 3.] float64 float64 float64
Just use the object:
np.float64
There are a huge number of operations
http://docs.scipy.org/doc/numpy/reference/routines.html
# Remember this guy?
ar1
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
ar1 + ar1
array([ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36])
ar1 + 100 # broadcasting
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118])
rad = np.arange(6.) * np.pi / 6
np.degrees(rad)
array([ 0., 30., 60., 90., 120., 150.])
np.sin(rad)
array([ 0. , 0.5 , 0.8660254, 1. , 0.8660254, 0.5 ])
# Create X values from -Pi t- Pi, 201 steps
x = np.linspace(-np.pi, np.pi, 201)
_ = plt.plot(x, np.sin(x))
rar1 = ar1.reshape(2,5)
rar1
array([[ 0, 2, 4, 6, 8], [10, 12, 14, 16, 18]])
Functions that flatten the array
np.ravel(rar1) # sometimes a copy
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
rar1.flatten() # always a copy
array([ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
[x for x in rar1.flat] # .flat is an iterator
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
Extra Credit There is an order
argument to these methods, 'C' (C-like) and 'F' (Fortran-like). By default NumPy ndarray
data is stored in a C-Like row-major layout, that is, row values are contiguous in memory.
rar2 = np.arange(4).reshape(2,2)
rar2
array([[0, 1], [2, 3]])
np.concatenate((rar2, rar2)) # default axis=0
array([[0, 1], [2, 3], [0, 1], [2, 3]])
np.concatenate((rar2, rar2), axis=1)
array([[0, 1, 0, 1], [2, 3, 2, 3]])
np.vstack((rar2, rar2))
array([[0, 1], [2, 3], [0, 1], [2, 3]])
np.hstack((rar2, rar2))
array([[0, 1, 0, 1], [2, 3, 2, 3]])
np.dstack((rar2, rar2))
array([[[0, 0], [1, 1]], [[2, 2], [3, 3]]])
np.tile(rar2, (2,2))
array([[0, 1, 0, 1], [2, 3, 2, 3], [0, 1, 0, 1], [2, 3, 2, 3]])
slicey = np.arange(10)
slicey
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
slicey[5]
5
slicey[5:8] # note 8's absence
array([5, 6, 7])
Omitting one of the values to get the rest of the values in one direction
slicey[3:]
array([3, 4, 5, 6, 7, 8, 9])
slicey[:3]
array([0, 1, 2])
Count by n
or stride
by adding another :
slicey[::2]
array([0, 2, 4, 6, 8])
slicey[1::2]
array([1, 3, 5, 7, 9])
slicey[-2:]
array([8, 9])
Assigning to a view modifies the original array.
slicey_slice = slicey[2:5]
slicey_slice
array([2, 3, 4])
slicey_slice[:] = 56
slicey_slice
array([56, 56, 56])
slicey
array([ 0, 1, 56, 56, 56, 5, 6, 7, 8, 9])
Moral of the story?
Use .copy()
on your slices if you don't want to trash your original array.
randy = np.random.randn(3,3)
randy
array([[ 1.34480772, 2.41880375, -0.15025801], [ 0.78994062, 2.14416936, -0.43433278], [ 0.66470266, -0.03281608, -0.49504067]])
positively_randy = randy >= 0.0
positively_randy
array([[ True, True, False], [ True, True, False], [ True, False, False]], dtype=bool)
randy[positively_randy]
array([ 1.34480772, 2.41880375, 0.78994062, 2.14416936, 0.66470266])
randy[~positively_randy]
array([-0.15025801, -0.43433278, -0.03281608, -0.49504067])
positively_randy
is an index, you can use it for access as shown above, or accessing on assignement as shown below.
randy[~positively_randy] = 0.0 # negate the booleans with ~
randy
array([[ 1.34480772, 2.41880375, 0. ], [ 0.78994062, 2.14416936, 0. ], [ 0.66470266, 0. , 0. ]])
Of course this could have all been done in one step.
andy = np.random.randn(3,3)
andy
array([[-0.27309172, 1.09020965, 0.54920302], [-0.81095048, -0.18885317, 0.6673531 ], [-1.4496338 , -0.40677127, 1.87158194]])
andy[andy < 0.0] = 0.0
andy
array([[ 0. , 1.09020965, 0.54920302], [ 0. , 0. , 0.6673531 ], [ 0. , 0. , 1.87158194]])
from scipy import misc
wallaby = misc.imread('wallaby_746_600x450.jpg')
print type(wallaby), wallaby.size, wallaby.shape, wallaby.dtype
<type 'numpy.ndarray'> 810000 (600, 450, 3) uint8
plt.imshow(wallaby)
<matplotlib.image.AxesImage at 0x7fa56c33f3d0>
fig, (ax0, ax1, ax2) = plt.subplots(ncols=3)
fig.set_size_inches(10, 4)
ax0.imshow(wallaby[:, :, 0], cmap='gray')
ax0.get_yaxis().set_ticks([]); ax0.get_xaxis().set_ticks([]); ax0.set_title('Red')
ax1.imshow(wallaby[:, :, 1], cmap='gray')
ax1.get_yaxis().set_ticks([]); ax1.get_xaxis().set_ticks([]); ax1.set_title('Green')
ax2.imshow(wallaby[:, :, 2], cmap='gray')
ax2.get_yaxis().set_ticks([]); ax2.get_xaxis().set_ticks([]); ax2.set_title('Blue')
<matplotlib.text.Text at 0x7fa5694db590>
h = plt.hist(wallaby[:, :, 2].flatten(), 256, fc='k', ec='k')