.npy
stores a single array (shape, dtype preserved). .npz
is a zip of multiple arrays. These are the fastest and most reliable formats for NumPy data.
import numpy as np
X = np.arange(12).reshape(3,4)
# Save a single array
np.save('data.npy', X)
# Load it back
X2 = np.load('data.npy')
print(X2.shape, X2.dtype)
# Save multiple arrays to .npz (named)
y = np.linspace(0, 1, 5)
w = np.array([1.0, 0.5, 2.0, 1.5])
np.savez('bundle.npz', features=X, target=y, weights=w)
# Or compressed
np.savez_compressed('bundle_compressed.npz', X=X, y=y, w=w)
# Load .npz (acts like a dict-like object)
data = np.load('bundle.npz')
print(list(data.keys())) # ['features', 'target', 'weights']
print(data['features'])
savetxt
& loadtxt
Use savetxt
for quick exports and loadtxt
for simple, clean numeric text files.
A = np.array([[1.5, 2.0, 3.25],
[4.0, 5.0, 6.125]])
# Save as CSV with header, 3 decimals
np.savetxt('matrix.csv', A, delimiter=',', fmt='%.3f',
header='col1,col2,col3', comments='')
# Load back (skip header)
B = np.loadtxt('matrix.csv', delimiter=',', skiprows=1)
print(B)
genfromtxt
genfromtxt
is more flexible than loadtxt
. It can handle missing values, different encodings, comments, and mixed dtypes.
from io import StringIO
import numpy as np
csv = StringIO("""# id, height, weight
1,170,65
2,,-1
3,160,58
""")
arr = np.genfromtxt(
csv, delimiter=',', skip_header=1,
dtype=[('id','i4'),('height','f8'),('weight','f8')],
missing_values=['', '-1'], filling_values=np.nan
)
print(arr['height']) # [170. nan 160.]
print(arr['weight']) # [65. nan 58.]
memmap
Memory-map a binary array on disk and access slices without loading the entire file into RAM.
# Create a large memmap and write in chunks
m = np.memmap('bigdata.dat', dtype='float32', mode='w+', shape=(10000, 1000))
m[0:1000] = np.random.rand(1000, 1000).astype('float32')
del m # flush to disk
# Read-only mapping later
m2 = np.memmap('bigdata.dat', dtype='float32', mode='r', shape=(10000, 1000))
print(m2[5000:5005, 100:110])
# Encoding & comments
arr = np.loadtxt('data_utf8.txt', delimiter=',', comments='#', encoding='utf-8')
# Custom converters (strip units like 'cm')
conv = {1: lambda s: float(s.decode('utf-8').replace('cm',''))}
arr2 = np.loadtxt('heights.csv', delimiter=',', converters=conv)
All these functions accept file paths or file-like objects. Use with open(...)
for explicit control and pathlib
for robust paths.
from pathlib import Path
p = Path('data') / 'sample.npy'
np.save(p, np.arange(5))
print(np.load(p))
fmt
in savetxt
to avoid precision loss.skiprows
/skip_header
.genfromtxt
with missing_values
+ filling_values
.npy/npz
or memmap
for performance.genfromtxt
or load as strings then post-process.# 1) Save a (100, 5) float array to 'scores.npy' and load it back.
X = np.random.rand(100, 5)
np.save('scores.npy', X)
print(np.load('scores.npy').shape)
# 2) Write a (3,3) matrix to CSV with 2 decimals and a header row, then load it.
M = np.arange(9).reshape(3,3) / 3
np.savetxt('M.csv', M, fmt='%.2f', delimiter=',', header='a,b,c', comments='')
print(np.loadtxt('M.csv', delimiter=',', skiprows=1))
# 3) Use genfromtxt to read a CSV with missing values into floats, fill NaNs with column means.
arr = np.genfromtxt('gaps.csv', delimiter=',', skip_header=1)
col_means = np.nanmean(arr, axis=0)
inds = np.where(np.isnan(arr))
arr[inds] = np.take(col_means, inds[1])
print(arr)
# 4) Create a memmap to a large file, write a slice, and read it back.
mm = np.memmap('blk.dat', dtype='float64', mode='w+', shape=(2000, 2000))
mm[100:200] = 1.23
del mm
mm2 = np.memmap('blk.dat', dtype='float64', mode='r', shape=(2000, 2000))
print(mm2[150, 150])
Author
🎥 Join me live on YouTubePassionate about coding and teaching, I publish practical tutorials on PHP, Python, JavaScript, SQL, and web development. My goal is to make learning simple, engaging, and project‑oriented with real examples and source code.