brukeropus.file.parse
1import os, struct, errno 2import numpy as np 3from brukeropus.file.constants import STRUCT_3D_INFO_BLOCK 4 5 6__docformat__ = "google" 7 8 9def read_opus_file_bytes(filepath) -> bytes: 10 '''Returns `bytes` of an OPUS file specified by `filepath` (or `None`). 11 12 Function determines if `filepath` points to an OPUS file by reading the first four bytes which are always the same 13 for OPUS files. If `filepath` is not a file, or points to a non-OPUS file, the function returns `None`. Otherwise 14 the function returns the entire file as raw `bytes`. 15 16 Args: 17 filepath (str or Path): full filepath to OPUS file 18 19 Returns: 20 **filebytes (bytes):** raw bytes of OPUS file or `None` (if filepath does not point to an OPUS file) 21 ''' 22 filebytes = None 23 if os.path.isfile(filepath): 24 with open(filepath, 'rb') as f: 25 try: 26 first_four = f.read(4) 27 if first_four == b'\n\n\xfe\xfe': 28 filebytes = first_four + f.read() 29 except: 30 pass # Empty file (or file with fewer than 4 bytes) 31 else: 32 raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath) 33 return filebytes 34 35 36def get_block_type(type_int: int) -> tuple: 37 '''Converts an int32 block type code to a six-integer tuple `block_type`. 38 39 This function is used to decode the `type_int` from the directory block of an OPUS file into a tuple of integers. 40 Each integer in the tuple provides information about the associated data block. 41 42 Args: 43 type_int: 32-bit integer decoded from file directory block 44 45 Returns: 46 **block_type (tuple):** six-integer tuple which specifies the block type 47 ''' 48 type_bit_str = format(type_int, '#034b') # binary representation as string 49 block_type = ( 50 int(type_bit_str[-2:], 2), 51 int(type_bit_str[-4:-2], 2), 52 int(type_bit_str[-10:-4], 2), 53 int(type_bit_str[-17:-10], 2), 54 int(type_bit_str[-19:-17], 2), 55 int(type_bit_str[-22:-19], 2) 56 ) 57 return block_type 58 59 60def parse_header(filebytes: bytes) -> tuple: 61 '''Parses the OPUS file header. 62 63 The header of an OPUS file contains some basic information about the file including the version number, location of 64 the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to 65 read the file directory block (which contains information about each block in the file) 66 67 Args: 68 filebytes: raw bytes of OPUS file (all bytes) 69 70 Returns: 71 **header_info (tuple):** 72 ( 73 **version (float64):** program version number as a floating-point date (later versions always greater) 74 **directory_start (int32):** pointer to start location of directory block (number of bytes) 75 **max_blocks (int32):** maximum number of blocks supported by the directory block (this should only be 76 relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file) 77 **num_blocks (int32):** total number of blocks in the opus file 78 ) 79 ''' 80 version = struct.unpack_from('d', filebytes, 4)[0] 81 directory_start = struct.unpack_from('<i', filebytes, 12)[0] 82 max_blocks = struct.unpack_from('<i', filebytes, 16)[0] 83 num_blocks = struct.unpack_from('<i', filebytes, 20)[0] 84 return version, directory_start, max_blocks, num_blocks 85 86 87def parse_directory(blockbytes: bytes) -> list: 88 '''Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start). 89 90 The directory block of an OPUS file contains information about every block in the file. The block information is 91 stored as three int32 values: `type_int`, `size_int`, `start`. `type_int` is an integer representation of the block 92 type. The bits of this `type_int` have meaning and are parsed into a tuple using `get_block_type`. The `size_int` is 93 the size of the block in 32-bit words. `start` is the starting location of the block (in number of bytes). 94 95 Args: 96 blockbytes: raw bytes of an OPUS file directory block 97 98 Returns: 99 **blocks (list):** list of block_info tuples 100 **block_info (tuple):** 101 ( 102 **block_type (tuple):** six-integer tuple which specifies the block type (see: `get_block_type`) 103 **size (int):** size (number of bytes) of the block 104 **start (int):** pointer to start location of the block (number of bytes) 105 ) 106 ''' 107 loc = 0 108 blocks = [] 109 while loc < len(blockbytes): 110 type_int, size_int, start = struct.unpack_from('<3i', blockbytes, loc) 111 loc = loc + 12 112 if start > 0: 113 block_type = get_block_type(type_int) 114 size = size_int*4 115 blocks.append((block_type, size, start)) 116 else: 117 break 118 return blocks 119 120 121def parse_params(blockbytes: bytes) -> dict: 122 '''Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals. 123 124 Parameter blocks are in the form: `XXX`, `dtype_code`, `size`, `val`. `XXX` is a three char abbreviation of the 125 parameter (key). The value of the parameter is decoded according to the `dtype_code` and size integers to be either: 126 `int`, `float`, or `string`. 127 128 Args: 129 blockbytes: raw bytes of an OPUS file parameter block 130 131 Returns: 132 **items (tuple):** (key, value) pairs where key is three char string (lowercase) and value can be `int`, `float` 133 or `string`. 134 ''' 135 loc = 0 136 params = dict() 137 while loc < len(blockbytes): 138 key = blockbytes[loc:loc + 3].decode('utf-8') 139 if key == 'END': 140 break 141 dtype_code, val_size = struct.unpack_from('<2h', blockbytes[loc + 4:loc + 8]) 142 val_size = val_size * 2 143 if dtype_code == 0: 144 fmt_str = '<i' 145 elif dtype_code == 1: 146 fmt_str = '<d' 147 else: 148 fmt_str = '<'+str(val_size)+'s' 149 try: 150 val = struct.unpack_from(fmt_str, blockbytes, loc + 8)[0] 151 if 's' in fmt_str: 152 x00_pos = val.find(b'\x00') 153 if x00_pos != -1: 154 val = val[:x00_pos].decode('latin-1') 155 else: 156 val = val.decode('latin-1') 157 except Exception as e: 158 val = 'Failed to decode: ' + str(e) 159 params[key.lower()] = val 160 loc = loc + val_size + 8 161 return params 162 163 164def get_dpf_dtype_count(dpf: int, size: int) -> tuple: 165 '''Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes). 166 167 Args: 168 dpf: data point format integer stored in data status block. 169 dpf = 1 -> array of float32 170 dpf = 2 -> array of int32 171 size: Block size in bytes. 172 173 Returns: 174 **dtype (numpy.dtype):** `numpy` dtype for defining an `ndarray` to store the data 175 **count (int):** length of array calculated from the block size and byte size of the dtype. 176 ''' 177 if dpf == 2: 178 dtype = np.int32 179 count = round(size/4) 180 else: 181 dtype = np.float32 182 count = round(size/4) 183 return dtype, count 184 185 186def parse_data(blockbytes: bytes, dpf: int = 1) -> np.ndarray: 187 '''Parses the bytes in a data block and returns a `numpy` array. 188 189 Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample, 190 reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.). Every data 191 block should have a corresponding data status parameter block which can be used to generate the x-array values for 192 the data block. The data status block also specifies the data type of the data array with the `DPF` parameter. It 193 appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit 194 integers when `DPF` = 2. 195 196 Args: 197 blockbytes: raw bytes of data block 198 dpf: data-point-format integer stored in corresponding data status block. 199 200 Returns: 201 **y_array (numpy.ndarray):** `numpy` array of y values contained in the data block 202 ''' 203 dtype, count = get_dpf_dtype_count(dpf=dpf, size=len(blockbytes)) 204 return np.frombuffer(blockbytes, dtype=dtype, count=count) 205 206 207def parse_data_series(blockbytes: bytes, dpf: int = 1) -> dict: 208 '''Parses the bytes in a 3D data block (series of spectra) and returns a data `dict` containing data and metadata. 209 210 3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they 211 include metadata for each of the spectrum. This function returns a `dict` containing all the extracted information 212 from the data block. The series spectra is formed into a 2D array while metadata captured for each spectra is 213 formed into a 1D array (length = number of spectral measurements in the series). 214 215 Args: 216 blockbytes: raw bytes of the data series block 217 dpf: data-point-format integer stored in corresponding data status block. 218 219 Returns: 220 **data_dict (dict):** `dict` containing all extracted information from the data block 221 { 222 **version:** file format version number (should be 0) 223 **num_blocks:** number of sub blocks; each sub block features a data spectra and associated metadata 224 **offset:** offset in bytes to the first sub data block 225 **data_size:** size in bytes of each sub data block 226 **info_size:** size in bytes of the metadata info block immediately following the sub data block 227 **store_table:** run numbers of the first and last blocks to keep track of skipped spectra 228 **y:** 2D `numpy` array containing all spectra (C-order) 229 **metadata arrays:** series of metadata arrays in 1D array format (e.g. `npt`, `mny`, `mxy`, `ert`). 230 The most useful one is generally `ert`, which can be used as the time axis for 3D data plots. 231 } 232 ''' 233 header = struct.unpack_from('6l', blockbytes, 0) 234 data = { 235 'version': header[0], 236 'num_blocks': header[1], 237 'offset': header[2], 238 'data_size': header[3], 239 'info_size': header[4], 240 } 241 data['store_table'] = [struct.unpack_from('<2l', blockbytes, 24 + i * 8) for i in range(header[5])] 242 dtype, count = get_dpf_dtype_count(dpf, data['data_size']) 243 data['y'] = np.zeros((data['num_blocks'], count), dtype=dtype) 244 for entry in STRUCT_3D_INFO_BLOCK: 245 data[entry['key']] = np.zeros((data['num_blocks']), dtype=entry['dtype']) 246 offset = data['offset'] 247 for i in range(data['num_blocks']): 248 data['y'][i] = np.frombuffer(blockbytes[offset:], dtype=dtype, count=count) 249 offset = offset + data['data_size'] 250 info_vals = struct.unpack_from('<' + ''.join([e['fmt'] for e in STRUCT_3D_INFO_BLOCK]), blockbytes, offset) 251 for j, entry in enumerate(STRUCT_3D_INFO_BLOCK): 252 data[entry['key']][i] = info_vals[j] 253 offset = offset + data['info_size'] 254 return data 255 256 257def parse_text(block_bytes: bytes) -> str: 258 '''Parses and OPUS file block as text (e.g. history or file-log block). 259 260 The history (aka file-log) block of an OPUS file contains some information about how the file was generated and 261 edits that have been performed on the file. This function parses the text block but does not take any steps to 262 parameterizing what is contained in the text. The history block is generally not needed to retrieve the file data 263 and metadata, but might be useful for inspecting the file. 264 265 Args: 266 blockbytes: raw bytes of the text block (e.g. history or file-log) 267 268 Returns: 269 text: string of text contained in the file block. 270 ''' 271 byte_string = struct.unpack('<' + str(len(block_bytes)) + 's', block_bytes)[0] 272 byte_strings = byte_string.split(b'\x00') 273 strings = [] 274 for entry in byte_strings: 275 if entry != b'': 276 try: 277 strings.append(entry.decode('latin-1')) 278 except Exception: 279 try: 280 strings.append(entry.decode('utf-8')) 281 except Exception as e: 282 strings.append('<Decode Exception>: ' + str(e)) 283 return '\n'.join(strings)
10def read_opus_file_bytes(filepath) -> bytes: 11 '''Returns `bytes` of an OPUS file specified by `filepath` (or `None`). 12 13 Function determines if `filepath` points to an OPUS file by reading the first four bytes which are always the same 14 for OPUS files. If `filepath` is not a file, or points to a non-OPUS file, the function returns `None`. Otherwise 15 the function returns the entire file as raw `bytes`. 16 17 Args: 18 filepath (str or Path): full filepath to OPUS file 19 20 Returns: 21 **filebytes (bytes):** raw bytes of OPUS file or `None` (if filepath does not point to an OPUS file) 22 ''' 23 filebytes = None 24 if os.path.isfile(filepath): 25 with open(filepath, 'rb') as f: 26 try: 27 first_four = f.read(4) 28 if first_four == b'\n\n\xfe\xfe': 29 filebytes = first_four + f.read() 30 except: 31 pass # Empty file (or file with fewer than 4 bytes) 32 else: 33 raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath) 34 return filebytes
Returns bytes
of an OPUS file specified by filepath
(or None
).
Function determines if filepath
points to an OPUS file by reading the first four bytes which are always the same
for OPUS files. If filepath
is not a file, or points to a non-OPUS file, the function returns None
. Otherwise
the function returns the entire file as raw bytes
.
Arguments:
- filepath (str or Path): full filepath to OPUS file
Returns:
filebytes (bytes): raw bytes of OPUS file or
None
(if filepath does not point to an OPUS file)
37def get_block_type(type_int: int) -> tuple: 38 '''Converts an int32 block type code to a six-integer tuple `block_type`. 39 40 This function is used to decode the `type_int` from the directory block of an OPUS file into a tuple of integers. 41 Each integer in the tuple provides information about the associated data block. 42 43 Args: 44 type_int: 32-bit integer decoded from file directory block 45 46 Returns: 47 **block_type (tuple):** six-integer tuple which specifies the block type 48 ''' 49 type_bit_str = format(type_int, '#034b') # binary representation as string 50 block_type = ( 51 int(type_bit_str[-2:], 2), 52 int(type_bit_str[-4:-2], 2), 53 int(type_bit_str[-10:-4], 2), 54 int(type_bit_str[-17:-10], 2), 55 int(type_bit_str[-19:-17], 2), 56 int(type_bit_str[-22:-19], 2) 57 ) 58 return block_type
Converts an int32 block type code to a six-integer tuple block_type
.
This function is used to decode the type_int
from the directory block of an OPUS file into a tuple of integers.
Each integer in the tuple provides information about the associated data block.
Arguments:
- type_int: 32-bit integer decoded from file directory block
Returns:
block_type (tuple): six-integer tuple which specifies the block type
61def parse_header(filebytes: bytes) -> tuple: 62 '''Parses the OPUS file header. 63 64 The header of an OPUS file contains some basic information about the file including the version number, location of 65 the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to 66 read the file directory block (which contains information about each block in the file) 67 68 Args: 69 filebytes: raw bytes of OPUS file (all bytes) 70 71 Returns: 72 **header_info (tuple):** 73 ( 74 **version (float64):** program version number as a floating-point date (later versions always greater) 75 **directory_start (int32):** pointer to start location of directory block (number of bytes) 76 **max_blocks (int32):** maximum number of blocks supported by the directory block (this should only be 77 relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file) 78 **num_blocks (int32):** total number of blocks in the opus file 79 ) 80 ''' 81 version = struct.unpack_from('d', filebytes, 4)[0] 82 directory_start = struct.unpack_from('<i', filebytes, 12)[0] 83 max_blocks = struct.unpack_from('<i', filebytes, 16)[0] 84 num_blocks = struct.unpack_from('<i', filebytes, 20)[0] 85 return version, directory_start, max_blocks, num_blocks
Parses the OPUS file header.
The header of an OPUS file contains some basic information about the file including the version number, location of the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to read the file directory block (which contains information about each block in the file)
Arguments:
- filebytes: raw bytes of OPUS file (all bytes)
Returns:
header_info (tuple):
(
version (float64): program version number as a floating-point date (later versions always greater)
directory_start (int32): pointer to start location of directory block (number of bytes)
max_blocks (int32): maximum number of blocks supported by the directory block (this should only be relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file)
num_blocks (int32): total number of blocks in the opus file
)
88def parse_directory(blockbytes: bytes) -> list: 89 '''Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start). 90 91 The directory block of an OPUS file contains information about every block in the file. The block information is 92 stored as three int32 values: `type_int`, `size_int`, `start`. `type_int` is an integer representation of the block 93 type. The bits of this `type_int` have meaning and are parsed into a tuple using `get_block_type`. The `size_int` is 94 the size of the block in 32-bit words. `start` is the starting location of the block (in number of bytes). 95 96 Args: 97 blockbytes: raw bytes of an OPUS file directory block 98 99 Returns: 100 **blocks (list):** list of block_info tuples 101 **block_info (tuple):** 102 ( 103 **block_type (tuple):** six-integer tuple which specifies the block type (see: `get_block_type`) 104 **size (int):** size (number of bytes) of the block 105 **start (int):** pointer to start location of the block (number of bytes) 106 ) 107 ''' 108 loc = 0 109 blocks = [] 110 while loc < len(blockbytes): 111 type_int, size_int, start = struct.unpack_from('<3i', blockbytes, loc) 112 loc = loc + 12 113 if start > 0: 114 block_type = get_block_type(type_int) 115 size = size_int*4 116 blocks.append((block_type, size, start)) 117 else: 118 break 119 return blocks
Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start).
The directory block of an OPUS file contains information about every block in the file. The block information is
stored as three int32 values: type_int
, size_int
, start
. type_int
is an integer representation of the block
type. The bits of this type_int
have meaning and are parsed into a tuple using get_block_type
. The size_int
is
the size of the block in 32-bit words. start
is the starting location of the block (in number of bytes).
Arguments:
- blockbytes: raw bytes of an OPUS file directory block
Returns:
blocks (list): list of block_info tuples block_info (tuple): (
block_type (tuple): six-integer tuple which specifies the block type (see:get_block_type
)
size (int): size (number of bytes) of the block
start (int): pointer to start location of the block (number of bytes)
)
122def parse_params(blockbytes: bytes) -> dict: 123 '''Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals. 124 125 Parameter blocks are in the form: `XXX`, `dtype_code`, `size`, `val`. `XXX` is a three char abbreviation of the 126 parameter (key). The value of the parameter is decoded according to the `dtype_code` and size integers to be either: 127 `int`, `float`, or `string`. 128 129 Args: 130 blockbytes: raw bytes of an OPUS file parameter block 131 132 Returns: 133 **items (tuple):** (key, value) pairs where key is three char string (lowercase) and value can be `int`, `float` 134 or `string`. 135 ''' 136 loc = 0 137 params = dict() 138 while loc < len(blockbytes): 139 key = blockbytes[loc:loc + 3].decode('utf-8') 140 if key == 'END': 141 break 142 dtype_code, val_size = struct.unpack_from('<2h', blockbytes[loc + 4:loc + 8]) 143 val_size = val_size * 2 144 if dtype_code == 0: 145 fmt_str = '<i' 146 elif dtype_code == 1: 147 fmt_str = '<d' 148 else: 149 fmt_str = '<'+str(val_size)+'s' 150 try: 151 val = struct.unpack_from(fmt_str, blockbytes, loc + 8)[0] 152 if 's' in fmt_str: 153 x00_pos = val.find(b'\x00') 154 if x00_pos != -1: 155 val = val[:x00_pos].decode('latin-1') 156 else: 157 val = val.decode('latin-1') 158 except Exception as e: 159 val = 'Failed to decode: ' + str(e) 160 params[key.lower()] = val 161 loc = loc + val_size + 8 162 return params
Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals.
Parameter blocks are in the form: XXX
, dtype_code
, size
, val
. XXX
is a three char abbreviation of the
parameter (key). The value of the parameter is decoded according to the dtype_code
and size integers to be either:
int
, float
, or string
.
Arguments:
- blockbytes: raw bytes of an OPUS file parameter block
Returns:
items (tuple): (key, value) pairs where key is three char string (lowercase) and value can be
int
,float
orstring
.
165def get_dpf_dtype_count(dpf: int, size: int) -> tuple: 166 '''Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes). 167 168 Args: 169 dpf: data point format integer stored in data status block. 170 dpf = 1 -> array of float32 171 dpf = 2 -> array of int32 172 size: Block size in bytes. 173 174 Returns: 175 **dtype (numpy.dtype):** `numpy` dtype for defining an `ndarray` to store the data 176 **count (int):** length of array calculated from the block size and byte size of the dtype. 177 ''' 178 if dpf == 2: 179 dtype = np.int32 180 count = round(size/4) 181 else: 182 dtype = np.float32 183 count = round(size/4) 184 return dtype, count
Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes).
Arguments:
- dpf: data point format integer stored in data status block. dpf = 1 -> array of float32 dpf = 2 -> array of int32
- size: Block size in bytes.
Returns:
dtype (numpy.dtype):
numpy
dtype for defining anndarray
to store the data count (int): length of array calculated from the block size and byte size of the dtype.
187def parse_data(blockbytes: bytes, dpf: int = 1) -> np.ndarray: 188 '''Parses the bytes in a data block and returns a `numpy` array. 189 190 Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample, 191 reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.). Every data 192 block should have a corresponding data status parameter block which can be used to generate the x-array values for 193 the data block. The data status block also specifies the data type of the data array with the `DPF` parameter. It 194 appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit 195 integers when `DPF` = 2. 196 197 Args: 198 blockbytes: raw bytes of data block 199 dpf: data-point-format integer stored in corresponding data status block. 200 201 Returns: 202 **y_array (numpy.ndarray):** `numpy` array of y values contained in the data block 203 ''' 204 dtype, count = get_dpf_dtype_count(dpf=dpf, size=len(blockbytes)) 205 return np.frombuffer(blockbytes, dtype=dtype, count=count)
Parses the bytes in a data block and returns a numpy
array.
Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample,
reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.). Every data
block should have a corresponding data status parameter block which can be used to generate the x-array values for
the data block. The data status block also specifies the data type of the data array with the DPF
parameter. It
appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit
integers when DPF
= 2.
Arguments:
- blockbytes: raw bytes of data block
- dpf: data-point-format integer stored in corresponding data status block.
Returns:
y_array (numpy.ndarray):
numpy
array of y values contained in the data block
208def parse_data_series(blockbytes: bytes, dpf: int = 1) -> dict: 209 '''Parses the bytes in a 3D data block (series of spectra) and returns a data `dict` containing data and metadata. 210 211 3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they 212 include metadata for each of the spectrum. This function returns a `dict` containing all the extracted information 213 from the data block. The series spectra is formed into a 2D array while metadata captured for each spectra is 214 formed into a 1D array (length = number of spectral measurements in the series). 215 216 Args: 217 blockbytes: raw bytes of the data series block 218 dpf: data-point-format integer stored in corresponding data status block. 219 220 Returns: 221 **data_dict (dict):** `dict` containing all extracted information from the data block 222 { 223 **version:** file format version number (should be 0) 224 **num_blocks:** number of sub blocks; each sub block features a data spectra and associated metadata 225 **offset:** offset in bytes to the first sub data block 226 **data_size:** size in bytes of each sub data block 227 **info_size:** size in bytes of the metadata info block immediately following the sub data block 228 **store_table:** run numbers of the first and last blocks to keep track of skipped spectra 229 **y:** 2D `numpy` array containing all spectra (C-order) 230 **metadata arrays:** series of metadata arrays in 1D array format (e.g. `npt`, `mny`, `mxy`, `ert`). 231 The most useful one is generally `ert`, which can be used as the time axis for 3D data plots. 232 } 233 ''' 234 header = struct.unpack_from('6l', blockbytes, 0) 235 data = { 236 'version': header[0], 237 'num_blocks': header[1], 238 'offset': header[2], 239 'data_size': header[3], 240 'info_size': header[4], 241 } 242 data['store_table'] = [struct.unpack_from('<2l', blockbytes, 24 + i * 8) for i in range(header[5])] 243 dtype, count = get_dpf_dtype_count(dpf, data['data_size']) 244 data['y'] = np.zeros((data['num_blocks'], count), dtype=dtype) 245 for entry in STRUCT_3D_INFO_BLOCK: 246 data[entry['key']] = np.zeros((data['num_blocks']), dtype=entry['dtype']) 247 offset = data['offset'] 248 for i in range(data['num_blocks']): 249 data['y'][i] = np.frombuffer(blockbytes[offset:], dtype=dtype, count=count) 250 offset = offset + data['data_size'] 251 info_vals = struct.unpack_from('<' + ''.join([e['fmt'] for e in STRUCT_3D_INFO_BLOCK]), blockbytes, offset) 252 for j, entry in enumerate(STRUCT_3D_INFO_BLOCK): 253 data[entry['key']][i] = info_vals[j] 254 offset = offset + data['info_size'] 255 return data
Parses the bytes in a 3D data block (series of spectra) and returns a data dict
containing data and metadata.
3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they
include metadata for each of the spectrum. This function returns a dict
containing all the extracted information
from the data block. The series spectra is formed into a 2D array while metadata captured for each spectra is
formed into a 1D array (length = number of spectral measurements in the series).
Arguments:
- blockbytes: raw bytes of the data series block
- dpf: data-point-format integer stored in corresponding data status block.
Returns:
data_dict (dict):
dict
containing all extracted information from the data block
{
version: file format version number (should be 0)
num_blocks: number of sub blocks; each sub block features a data spectra and associated metadata
offset: offset in bytes to the first sub data block
data_size: size in bytes of each sub data block
info_size: size in bytes of the metadata info block immediately following the sub data block
store_table: run numbers of the first and last blocks to keep track of skipped spectra
y: 2Dnumpy
array containing all spectra (C-order)
metadata arrays: series of metadata arrays in 1D array format (e.g.npt
,mny
,mxy
,ert
). The most useful one is generallyert
, which can be used as the time axis for 3D data plots. }
258def parse_text(block_bytes: bytes) -> str: 259 '''Parses and OPUS file block as text (e.g. history or file-log block). 260 261 The history (aka file-log) block of an OPUS file contains some information about how the file was generated and 262 edits that have been performed on the file. This function parses the text block but does not take any steps to 263 parameterizing what is contained in the text. The history block is generally not needed to retrieve the file data 264 and metadata, but might be useful for inspecting the file. 265 266 Args: 267 blockbytes: raw bytes of the text block (e.g. history or file-log) 268 269 Returns: 270 text: string of text contained in the file block. 271 ''' 272 byte_string = struct.unpack('<' + str(len(block_bytes)) + 's', block_bytes)[0] 273 byte_strings = byte_string.split(b'\x00') 274 strings = [] 275 for entry in byte_strings: 276 if entry != b'': 277 try: 278 strings.append(entry.decode('latin-1')) 279 except Exception: 280 try: 281 strings.append(entry.decode('utf-8')) 282 except Exception as e: 283 strings.append('<Decode Exception>: ' + str(e)) 284 return '\n'.join(strings)
Parses and OPUS file block as text (e.g. history or file-log block).
The history (aka file-log) block of an OPUS file contains some information about how the file was generated and edits that have been performed on the file. This function parses the text block but does not take any steps to parameterizing what is contained in the text. The history block is generally not needed to retrieve the file data and metadata, but might be useful for inspecting the file.
Arguments:
- blockbytes: raw bytes of the text block (e.g. history or file-log)
Returns:
text: string of text contained in the file block.