h5py使用

groups类比词典,dataset类比Numpy中的数组

读取H5文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
>>> import h5py
>>> import numpy as np
>>> # 打开文件
>>> f = h5py.File('test-dev.h5', 'r')
>>> f.keys()
[u'my_xmax', u'my_xmin', u'my_ymax', u'my_ymin']
>>> xmax = f['my_xmax']

>>> xmax = f['my_xmax']
>>> type(xmax)
h5py._hl.dataset.Dataset
>>> xmax = f['my_xmax'][:]
>>> type(xmax)
numpy.ndarray

>>> xmax.shape
(1257351,)
>>> xmax.dtype
dtype('int64')
>>> xmax[...]
array([527, 260, 638, ..., 365, 334, 262])

写入H5文件

1
2
3
4
5
6
7
8
9
10
11
12
>>> f = h5py.File('test-dev.h5','w')

>>> f.create_dataset('bndbox', data=h5_bndbox)
>>> f.create_dataset('imgname', data=h5_imgname)
>>> f.create_dataset('part', data=h5_part)


>>> f['bndbox'] = h5_bndbox
>>> f['imgname'] = h5_imgname
>>> f['part'] = h5_part

>>> f.close()

字符串的特殊处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
>>> f = h5py.File('annot.h5','r')
>>> imgname = f['imgname'][:]
>>> imgname.shape
(146545, 31)
>>> img0 = imgname[0]
>>> img.dtype
dtype('float64')
>>> img0[...]
array([ 67., 79., 67., 79., 95., 116., 114., 97., 105.,
110., 50., 48., 49., 52., 95., 48., 48., 48.,
48., 48., 48., 50., 54., 50., 49., 52., 53.,
46., 106., 112., 103.])

# 先转化编码格式
>>> img0 = img0.astype(np.uint8)
>>> img0.tostring()
'COCO_train2014_000000262145.jpg'

>>> img0.tostring().decode('ascii')
u'COCO_train2014_000000262145.jpg'

np.fromstring(imagename,dtype=np.uint8).astype('float64')
# 写进h5
f.create_dataset('imgname', data=imgname)

http://jeff-leaf.site/2017/09/29/Python%E5%A4%84%E7%90%86HDF5%E6%96%87%E4%BB%B6/

请作者喝一杯咖啡☕️