brukeropus.file.parse

View Source

  1import os, struct, errno
  2import numpy as np
  3from brukeropus.file.constants import STRUCT_3D_INFO_BLOCK
  4
  5
  6__docformat__ = "google"
  7
  8
  9def read_opus_file_bytes(filepath) -> bytes:
 10    '''Returns `bytes` of an OPUS file specified by `filepath` (or `None`).
 11
 12    Function determines if `filepath` points to an OPUS file by reading the first four bytes which are always the same
 13    for OPUS files.  If `filepath` is not a file, or points to a non-OPUS file, the function returns `None`.  Otherwise
 14    the function returns the entire file as raw `bytes`.
 15
 16    Args:
 17        filepath (str or Path): full filepath to OPUS file
 18
 19    Returns:
 20        **filebytes (bytes):** raw bytes of OPUS file or `None` (if filepath does not point to an OPUS file)
 21    '''
 22    filebytes = None
 23    if os.path.isfile(filepath):
 24        with open(filepath, 'rb') as f:
 25            try:
 26                first_four = f.read(4)
 27                if first_four == b'\n\n\xfe\xfe':
 28                    filebytes = first_four + f.read()
 29            except:
 30                pass # Empty file (or file with fewer than 4 bytes)
 31    else:
 32        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath)
 33    return filebytes
 34
 35
 36def get_block_type(type_int: int) -> tuple:
 37    '''Converts an int32 block type code to a six-integer tuple `block_type`.
 38
 39    This function is used to decode the `type_int` from the directory block of an OPUS file into a tuple of integers.
 40    Each integer in the tuple provides information about the associated data block.
 41
 42    Args:
 43        type_int: 32-bit integer decoded from file directory block
 44
 45    Returns:
 46        **block_type (tuple):** six-integer tuple which specifies the block type
 47    '''
 48    type_bit_str = format(type_int, '#034b')  # binary representation as string
 49    block_type = (
 50        int(type_bit_str[-2:], 2),
 51        int(type_bit_str[-4:-2], 2),
 52        int(type_bit_str[-10:-4], 2),
 53        int(type_bit_str[-17:-10], 2),
 54        int(type_bit_str[-19:-17], 2),
 55        int(type_bit_str[-22:-19], 2)
 56    )
 57    return block_type
 58
 59
 60def parse_header(filebytes: bytes) -> tuple:
 61    '''Parses the OPUS file header.
 62
 63    The header of an OPUS file contains some basic information about the file including the version number, location of
 64    the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to
 65    read the file directory block (which contains information about each block in the file)
 66
 67    Args:
 68        filebytes: raw bytes of OPUS file (all bytes)
 69
 70    Returns:
 71        **header_info (tuple):**  
 72            (  
 73                **version (float64):** program version number as a floating-point date (later versions always greater)  
 74                **directory_start (int32):** pointer to start location of directory block (number of bytes)  
 75                **max_blocks (int32):** maximum number of blocks supported by the directory block (this should only be
 76                    relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file)  
 77                **num_blocks (int32):** total number of blocks in the opus file  
 78            )
 79    '''
 80    version = struct.unpack_from('d', filebytes, 4)[0]
 81    directory_start = struct.unpack_from('<i', filebytes, 12)[0]
 82    max_blocks = struct.unpack_from('<i', filebytes, 16)[0]
 83    num_blocks = struct.unpack_from('<i', filebytes, 20)[0]
 84    return version, directory_start, max_blocks, num_blocks
 85
 86
 87def parse_directory(blockbytes: bytes) -> list:
 88    '''Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start).
 89
 90    The directory block of an OPUS file contains information about every block in the file. The block information is
 91    stored as three int32 values: `type_int`, `size_int`, `start`.  `type_int` is an integer representation of the block
 92    type. The bits of this `type_int` have meaning and are parsed into a tuple using `get_block_type`. The `size_int` is
 93    the size of the block in 32-bit words. `start` is the starting location of the block (in number of bytes).
 94
 95    Args:
 96        blockbytes: raw bytes of an OPUS file directory block
 97
 98    Returns:
 99        **blocks (list):** list of block_info tuples
100            **block_info (tuple):**
101                (  
102                    **block_type (tuple):** six-integer tuple which specifies the block type (see: `get_block_type`)  
103                    **size (int):** size (number of bytes) of the block  
104                    **start (int):** pointer to start location of the block (number of bytes)  
105                )
106    '''
107    loc = 0
108    blocks = []
109    while loc < len(blockbytes):
110        type_int, size_int, start = struct.unpack_from('<3i', blockbytes, loc)
111        loc = loc + 12
112        if start > 0:
113            block_type = get_block_type(type_int)
114            size = size_int*4
115            blocks.append((block_type, size, start))
116        else:
117            break
118    return blocks
119
120
121def parse_params(blockbytes: bytes) -> dict:
122    '''Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals.
123
124    Parameter blocks are in the form: `XXX`, `dtype_code`, `size`, `val`.  `XXX` is a three char abbreviation of the
125    parameter (key). The value of the parameter is decoded according to the `dtype_code` and size integers to be either:
126    `int`, `float`, or `string`.
127
128    Args:
129        blockbytes: raw bytes of an OPUS file parameter block
130
131    Returns:
132        **items (tuple):** (key, value) pairs where key is three char string (lowercase) and value can be `int`, `float`
133            or `string`.
134    '''
135    loc = 0
136    params = dict()
137    while loc < len(blockbytes):
138        key = blockbytes[loc:loc + 3].decode('utf-8')
139        if key == 'END':
140            break
141        dtype_code, val_size = struct.unpack_from('<2h', blockbytes[loc + 4:loc + 8])
142        val_size = val_size * 2
143        if dtype_code == 0:
144            fmt_str = '<i'
145        elif dtype_code == 1:
146            fmt_str = '<d'
147        else:
148            fmt_str = '<'+str(val_size)+'s'
149        try:
150            val = struct.unpack_from(fmt_str, blockbytes, loc + 8)[0]
151            if 's' in fmt_str:
152                x00_pos = val.find(b'\x00')
153                if x00_pos != -1:
154                    val = val[:x00_pos].decode('latin-1')
155                else:
156                    val = val.decode('latin-1')
157        except Exception as e:
158            val = 'Failed to decode: ' + str(e)
159        params[key.lower()] = val
160        loc = loc + val_size + 8
161    return params
162
163
164def get_dpf_dtype_count(dpf: int, size: int) -> tuple:
165    '''Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes).
166
167    Args:
168        dpf: data point format integer stored in data status block.
169            dpf = 1 -> array of float32
170            dpf = 2 -> array of int32
171        size: Block size in bytes.
172
173    Returns:
174        **dtype (numpy.dtype):** `numpy` dtype for defining an `ndarray` to store the data
175        **count (int):** length of array calculated from the block size and byte size of the dtype.
176    '''
177    if dpf == 2:
178        dtype = np.int32
179        count = round(size/4)
180    else:
181        dtype = np.float32
182        count = round(size/4)
183    return dtype, count
184
185
186def parse_data(blockbytes: bytes, dpf: int = 1) -> np.ndarray:
187    '''Parses the bytes in a data block and returns a `numpy` array.
188
189    Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample,
190    reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.).  Every data
191    block should have a corresponding data status parameter block which can be used to generate the x-array values for
192    the data block. The data status block also specifies the data type of the data array with the `DPF` parameter. It
193    appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit
194    integers when `DPF` = 2.
195
196    Args:
197        blockbytes: raw bytes of data block
198        dpf: data-point-format integer stored in corresponding data status block.
199
200    Returns:
201        **y_array (numpy.ndarray):** `numpy` array of y values contained in the data block
202    '''
203    dtype, count = get_dpf_dtype_count(dpf=dpf, size=len(blockbytes))
204    return np.frombuffer(blockbytes, dtype=dtype, count=count)
205
206
207def parse_data_series(blockbytes: bytes, dpf: int = 1) -> dict:
208    '''Parses the bytes in a 3D data block (series of spectra) and returns a data `dict` containing data and metadata.
209
210    3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they
211    include metadata for each of the spectrum.  This function returns a `dict` containing all the extracted information
212    from the data block.  The series spectra is formed into a 2D array while metadata captured for each spectra is
213    formed into a 1D array (length = number of spectral measurements in the series).
214
215    Args:
216        blockbytes: raw bytes of the data series block
217        dpf: data-point-format integer stored in corresponding data status block.
218
219    Returns:
220        **data_dict (dict):** `dict` containing all extracted information from the data block  
221            {  
222                **version:** file format version number (should be 0)  
223                **num_blocks:** number of sub blocks; each sub block features a data spectra and associated metadata  
224                **offset:** offset in bytes to the first sub data block  
225                **data_size:** size in bytes of each sub data block  
226                **info_size:** size in bytes of the metadata info block immediately following the sub data block  
227                **store_table:** run numbers of the first and last blocks to keep track of skipped spectra  
228                **y:** 2D `numpy` array containing all spectra (C-order)  
229                **metadata arrays:** series of metadata arrays in 1D array format (e.g. `npt`, `mny`, `mxy`, `ert`).
230                    The most useful one is generally `ert`, which can be used as the time axis for 3D data plots.
231            }
232    '''
233    header = struct.unpack_from('6l', blockbytes, 0)
234    data = {
235        'version': header[0],
236        'num_blocks': header[1],
237        'offset': header[2],
238        'data_size': header[3],
239        'info_size': header[4],
240    }
241    data['store_table'] = [struct.unpack_from('<2l', blockbytes, 24 + i * 8) for i in range(header[5])]
242    dtype, count = get_dpf_dtype_count(dpf, data['data_size'])
243    data['y'] = np.zeros((data['num_blocks'], count), dtype=dtype)
244    for entry in STRUCT_3D_INFO_BLOCK:
245        data[entry['key']] = np.zeros((data['num_blocks']), dtype=entry['dtype'])
246    offset = data['offset']
247    for i in range(data['num_blocks']):
248        data['y'][i] = np.frombuffer(blockbytes[offset:], dtype=dtype, count=count)
249        offset = offset + data['data_size']
250        info_vals = struct.unpack_from('<' + ''.join([e['fmt'] for e in STRUCT_3D_INFO_BLOCK]), blockbytes, offset)
251        for j, entry in enumerate(STRUCT_3D_INFO_BLOCK):
252            data[entry['key']][i] = info_vals[j]
253        offset = offset + data['info_size']
254    return data
255
256
257def parse_text(block_bytes: bytes) -> str:
258    '''Parses and OPUS file block as text (e.g. history or file-log block).
259
260    The history (aka file-log) block of an OPUS file contains some information about how the file was generated and
261    edits that have been performed on the file.  This function parses the text block but does not take any steps to
262    parameterizing what is contained in the text.  The history block is generally not needed to retrieve the file data
263    and metadata, but might be useful for inspecting the file.
264
265    Args:
266        blockbytes: raw bytes of the text block (e.g. history or file-log)
267
268    Returns:
269        text: string of text contained in the file block.
270    '''
271    byte_string = struct.unpack('<' + str(len(block_bytes)) + 's', block_bytes)[0]
272    byte_strings = byte_string.split(b'\x00')
273    strings = []
274    for entry in byte_strings:
275        if entry != b'':
276            try:
277                strings.append(entry.decode('latin-1'))
278            except Exception:
279                try:
280                    strings.append(entry.decode('utf-8'))
281                except Exception as e:
282                    strings.append('<Decode Exception>: ' + str(e))
283    return '\n'.join(strings)

def read_opus_file_bytes(filepath) -> bytes: View Source

10def read_opus_file_bytes(filepath) -> bytes:
11    '''Returns `bytes` of an OPUS file specified by `filepath` (or `None`).
12
13    Function determines if `filepath` points to an OPUS file by reading the first four bytes which are always the same
14    for OPUS files.  If `filepath` is not a file, or points to a non-OPUS file, the function returns `None`.  Otherwise
15    the function returns the entire file as raw `bytes`.
16
17    Args:
18        filepath (str or Path): full filepath to OPUS file
19
20    Returns:
21        **filebytes (bytes):** raw bytes of OPUS file or `None` (if filepath does not point to an OPUS file)
22    '''
23    filebytes = None
24    if os.path.isfile(filepath):
25        with open(filepath, 'rb') as f:
26            try:
27                first_four = f.read(4)
28                if first_four == b'\n\n\xfe\xfe':
29                    filebytes = first_four + f.read()
30            except:
31                pass # Empty file (or file with fewer than 4 bytes)
32    else:
33        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filepath)
34    return filebytes

Returns bytes of an OPUS file specified by filepath (or None).

Function determines if filepath points to an OPUS file by reading the first four bytes which are always the same for OPUS files. If filepath is not a file, or points to a non-OPUS file, the function returns None. Otherwise the function returns the entire file as raw bytes.

Arguments:

filepath (str or Path): full filepath to OPUS file

Returns:

filebytes (bytes): raw bytes of OPUS file or None (if filepath does not point to an OPUS file)

def get_block_type(type_int: int) -> tuple: View Source

37def get_block_type(type_int: int) -> tuple:
38    '''Converts an int32 block type code to a six-integer tuple `block_type`.
39
40    This function is used to decode the `type_int` from the directory block of an OPUS file into a tuple of integers.
41    Each integer in the tuple provides information about the associated data block.
42
43    Args:
44        type_int: 32-bit integer decoded from file directory block
45
46    Returns:
47        **block_type (tuple):** six-integer tuple which specifies the block type
48    '''
49    type_bit_str = format(type_int, '#034b')  # binary representation as string
50    block_type = (
51        int(type_bit_str[-2:], 2),
52        int(type_bit_str[-4:-2], 2),
53        int(type_bit_str[-10:-4], 2),
54        int(type_bit_str[-17:-10], 2),
55        int(type_bit_str[-19:-17], 2),
56        int(type_bit_str[-22:-19], 2)
57    )
58    return block_type

Converts an int32 block type code to a six-integer tuple block_type.

This function is used to decode the type_int from the directory block of an OPUS file into a tuple of integers. Each integer in the tuple provides information about the associated data block.

Arguments:

type_int: 32-bit integer decoded from file directory block

Returns:

block_type (tuple): six-integer tuple which specifies the block type

def parse_header(filebytes: bytes) -> tuple: View Source

61def parse_header(filebytes: bytes) -> tuple:
62    '''Parses the OPUS file header.
63
64    The header of an OPUS file contains some basic information about the file including the version number, location of
65    the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to
66    read the file directory block (which contains information about each block in the file)
67
68    Args:
69        filebytes: raw bytes of OPUS file (all bytes)
70
71    Returns:
72        **header_info (tuple):**  
73            (  
74                **version (float64):** program version number as a floating-point date (later versions always greater)  
75                **directory_start (int32):** pointer to start location of directory block (number of bytes)  
76                **max_blocks (int32):** maximum number of blocks supported by the directory block (this should only be
77                    relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file)  
78                **num_blocks (int32):** total number of blocks in the opus file  
79            )
80    '''
81    version = struct.unpack_from('d', filebytes, 4)[0]
82    directory_start = struct.unpack_from('<i', filebytes, 12)[0]
83    max_blocks = struct.unpack_from('<i', filebytes, 16)[0]
84    num_blocks = struct.unpack_from('<i', filebytes, 20)[0]
85    return version, directory_start, max_blocks, num_blocks

Parses the OPUS file header.

The header of an OPUS file contains some basic information about the file including the version number, location of the directory block, and number of blocks in the file. This header is first to be parsed as it specifies how to read the file directory block (which contains information about each block in the file)

Arguments:

filebytes: raw bytes of OPUS file (all bytes)

Returns:

header_info (tuple):
(
version (float64): program version number as a floating-point date (later versions always greater)
directory_start (int32): pointer to start location of directory block (number of bytes)
max_blocks (int32): maximum number of blocks supported by the directory block (this should only be relevant when trying to edit an OPUS file, i.e. when adding data blocks to a file)
num_blocks (int32): total number of blocks in the opus file
)

def parse_directory(blockbytes: bytes) -> list: View Source

 88def parse_directory(blockbytes: bytes) -> list:
 89    '''Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start).
 90
 91    The directory block of an OPUS file contains information about every block in the file. The block information is
 92    stored as three int32 values: `type_int`, `size_int`, `start`.  `type_int` is an integer representation of the block
 93    type. The bits of this `type_int` have meaning and are parsed into a tuple using `get_block_type`. The `size_int` is
 94    the size of the block in 32-bit words. `start` is the starting location of the block (in number of bytes).
 95
 96    Args:
 97        blockbytes: raw bytes of an OPUS file directory block
 98
 99    Returns:
100        **blocks (list):** list of block_info tuples
101            **block_info (tuple):**
102                (  
103                    **block_type (tuple):** six-integer tuple which specifies the block type (see: `get_block_type`)  
104                    **size (int):** size (number of bytes) of the block  
105                    **start (int):** pointer to start location of the block (number of bytes)  
106                )
107    '''
108    loc = 0
109    blocks = []
110    while loc < len(blockbytes):
111        type_int, size_int, start = struct.unpack_from('<3i', blockbytes, loc)
112        loc = loc + 12
113        if start > 0:
114            block_type = get_block_type(type_int)
115            size = size_int*4
116            blocks.append((block_type, size, start))
117        else:
118            break
119    return blocks

Parses directory block of OPUS file and returns a list of block info tuples: (type, size, start).

The directory block of an OPUS file contains information about every block in the file. The block information is stored as three int32 values: type_int, size_int, start. type_int is an integer representation of the block type. The bits of this type_int have meaning and are parsed into a tuple using get_block_type. The size_int is the size of the block in 32-bit words. start is the starting location of the block (in number of bytes).

Arguments:

blockbytes: raw bytes of an OPUS file directory block

Returns:

blocks (list): list of block_info tuples block_info (tuple): (
block_type (tuple): six-integer tuple which specifies the block type (see: get_block_type)
size (int): size (number of bytes) of the block
start (int): pointer to start location of the block (number of bytes)
)

def parse_params(blockbytes: bytes) -> dict: View Source

122def parse_params(blockbytes: bytes) -> dict:
123    '''Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals.
124
125    Parameter blocks are in the form: `XXX`, `dtype_code`, `size`, `val`.  `XXX` is a three char abbreviation of the
126    parameter (key). The value of the parameter is decoded according to the `dtype_code` and size integers to be either:
127    `int`, `float`, or `string`.
128
129    Args:
130        blockbytes: raw bytes of an OPUS file parameter block
131
132    Returns:
133        **items (tuple):** (key, value) pairs where key is three char string (lowercase) and value can be `int`, `float`
134            or `string`.
135    '''
136    loc = 0
137    params = dict()
138    while loc < len(blockbytes):
139        key = blockbytes[loc:loc + 3].decode('utf-8')
140        if key == 'END':
141            break
142        dtype_code, val_size = struct.unpack_from('<2h', blockbytes[loc + 4:loc + 8])
143        val_size = val_size * 2
144        if dtype_code == 0:
145            fmt_str = '<i'
146        elif dtype_code == 1:
147            fmt_str = '<d'
148        else:
149            fmt_str = '<'+str(val_size)+'s'
150        try:
151            val = struct.unpack_from(fmt_str, blockbytes, loc + 8)[0]
152            if 's' in fmt_str:
153                x00_pos = val.find(b'\x00')
154                if x00_pos != -1:
155                    val = val[:x00_pos].decode('latin-1')
156                else:
157                    val = val.decode('latin-1')
158        except Exception as e:
159            val = 'Failed to decode: ' + str(e)
160        params[key.lower()] = val
161        loc = loc + val_size + 8
162    return params

Parses the bytes in a parameter block and returns a dict containing the decoded keys and vals.

Parameter blocks are in the form: XXX, dtype_code, size, val. XXX is a three char abbreviation of the parameter (key). The value of the parameter is decoded according to the dtype_code and size integers to be either: int, float, or string.

Arguments:

blockbytes: raw bytes of an OPUS file parameter block

Returns:

items (tuple): (key, value) pairs where key is three char string (lowercase) and value can be int, float or string.

def get_dpf_dtype_count(dpf: int, size: int) -> tuple: View Source

165def get_dpf_dtype_count(dpf: int, size: int) -> tuple:
166    '''Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes).
167
168    Args:
169        dpf: data point format integer stored in data status block.
170            dpf = 1 -> array of float32
171            dpf = 2 -> array of int32
172        size: Block size in bytes.
173
174    Returns:
175        **dtype (numpy.dtype):** `numpy` dtype for defining an `ndarray` to store the data
176        **count (int):** length of array calculated from the block size and byte size of the dtype.
177    '''
178    if dpf == 2:
179        dtype = np.int32
180        count = round(size/4)
181    else:
182        dtype = np.float32
183        count = round(size/4)
184    return dtype, count

Returns numpy dtype and array count from the data point format (dpf) and block size (in bytes).

Arguments:

dpf: data point format integer stored in data status block. dpf = 1 -> array of float32 dpf = 2 -> array of int32
size: Block size in bytes.

Returns:

dtype (numpy.dtype): numpy dtype for defining an ndarray to store the data count (int): length of array calculated from the block size and byte size of the dtype.

def parse_data(blockbytes: bytes, dpf: int = 1) -> numpy.ndarray: View Source

187def parse_data(blockbytes: bytes, dpf: int = 1) -> np.ndarray:
188    '''Parses the bytes in a data block and returns a `numpy` array.
189
190    Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample,
191    reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.).  Every data
192    block should have a corresponding data status parameter block which can be used to generate the x-array values for
193    the data block. The data status block also specifies the data type of the data array with the `DPF` parameter. It
194    appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit
195    integers when `DPF` = 2.
196
197    Args:
198        blockbytes: raw bytes of data block
199        dpf: data-point-format integer stored in corresponding data status block.
200
201    Returns:
202        **y_array (numpy.ndarray):** `numpy` array of y values contained in the data block
203    '''
204    dtype, count = get_dpf_dtype_count(dpf=dpf, size=len(blockbytes))
205    return np.frombuffer(blockbytes, dtype=dtype, count=count)

Parses the bytes in a data block and returns a numpy array.

Data blocks contain no metadata, only the y-values of a data array. Data arrays include: single-channel sample, reference, phase, interferograms, and a variety of resultant data (transmission, absorption, etc.). Every data block should have a corresponding data status parameter block which can be used to generate the x-array values for the data block. The data status block also specifies the data type of the data array with the DPF parameter. It appears that OPUS currently exclusively stores data blocks as 32-bit floats, but has a reservation for 32-bit integers when DPF = 2.

Arguments:

blockbytes: raw bytes of data block
dpf: data-point-format integer stored in corresponding data status block.

Returns:

y_array (numpy.ndarray): numpy array of y values contained in the data block

def parse_data_series(blockbytes: bytes, dpf: int = 1) -> dict: View Source

208def parse_data_series(blockbytes: bytes, dpf: int = 1) -> dict:
209    '''Parses the bytes in a 3D data block (series of spectra) and returns a data `dict` containing data and metadata.
210
211    3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they
212    include metadata for each of the spectrum.  This function returns a `dict` containing all the extracted information
213    from the data block.  The series spectra is formed into a 2D array while metadata captured for each spectra is
214    formed into a 1D array (length = number of spectral measurements in the series).
215
216    Args:
217        blockbytes: raw bytes of the data series block
218        dpf: data-point-format integer stored in corresponding data status block.
219
220    Returns:
221        **data_dict (dict):** `dict` containing all extracted information from the data block  
222            {  
223                **version:** file format version number (should be 0)  
224                **num_blocks:** number of sub blocks; each sub block features a data spectra and associated metadata  
225                **offset:** offset in bytes to the first sub data block  
226                **data_size:** size in bytes of each sub data block  
227                **info_size:** size in bytes of the metadata info block immediately following the sub data block  
228                **store_table:** run numbers of the first and last blocks to keep track of skipped spectra  
229                **y:** 2D `numpy` array containing all spectra (C-order)  
230                **metadata arrays:** series of metadata arrays in 1D array format (e.g. `npt`, `mny`, `mxy`, `ert`).
231                    The most useful one is generally `ert`, which can be used as the time axis for 3D data plots.
232            }
233    '''
234    header = struct.unpack_from('6l', blockbytes, 0)
235    data = {
236        'version': header[0],
237        'num_blocks': header[1],
238        'offset': header[2],
239        'data_size': header[3],
240        'info_size': header[4],
241    }
242    data['store_table'] = [struct.unpack_from('<2l', blockbytes, 24 + i * 8) for i in range(header[5])]
243    dtype, count = get_dpf_dtype_count(dpf, data['data_size'])
244    data['y'] = np.zeros((data['num_blocks'], count), dtype=dtype)
245    for entry in STRUCT_3D_INFO_BLOCK:
246        data[entry['key']] = np.zeros((data['num_blocks']), dtype=entry['dtype'])
247    offset = data['offset']
248    for i in range(data['num_blocks']):
249        data['y'][i] = np.frombuffer(blockbytes[offset:], dtype=dtype, count=count)
250        offset = offset + data['data_size']
251        info_vals = struct.unpack_from('<' + ''.join([e['fmt'] for e in STRUCT_3D_INFO_BLOCK]), blockbytes, offset)
252        for j, entry in enumerate(STRUCT_3D_INFO_BLOCK):
253            data[entry['key']][i] = info_vals[j]
254        offset = offset + data['info_size']
255    return data

Parses the bytes in a 3D data block (series of spectra) and returns a data dict containing data and metadata.

3D data blocks are structured differently than standard data blocks. In addition to the series of spectra, they include metadata for each of the spectrum. This function returns a dict containing all the extracted information from the data block. The series spectra is formed into a 2D array while metadata captured for each spectra is formed into a 1D array (length = number of spectral measurements in the series).

Arguments:

blockbytes: raw bytes of the data series block
dpf: data-point-format integer stored in corresponding data status block.

Returns:

data_dict (dict): dict containing all extracted information from the data block
{
version: file format version number (should be 0)
num_blocks: number of sub blocks; each sub block features a data spectra and associated metadata
offset: offset in bytes to the first sub data block
data_size: size in bytes of each sub data block
info_size: size in bytes of the metadata info block immediately following the sub data block
store_table: run numbers of the first and last blocks to keep track of skipped spectra
y: 2D numpy array containing all spectra (C-order)
metadata arrays: series of metadata arrays in 1D array format (e.g. npt, mny, mxy, ert). The most useful one is generally ert, which can be used as the time axis for 3D data plots. }

def parse_text(block_bytes: bytes) -> str: View Source

258def parse_text(block_bytes: bytes) -> str:
259    '''Parses and OPUS file block as text (e.g. history or file-log block).
260
261    The history (aka file-log) block of an OPUS file contains some information about how the file was generated and
262    edits that have been performed on the file.  This function parses the text block but does not take any steps to
263    parameterizing what is contained in the text.  The history block is generally not needed to retrieve the file data
264    and metadata, but might be useful for inspecting the file.
265
266    Args:
267        blockbytes: raw bytes of the text block (e.g. history or file-log)
268
269    Returns:
270        text: string of text contained in the file block.
271    '''
272    byte_string = struct.unpack('<' + str(len(block_bytes)) + 's', block_bytes)[0]
273    byte_strings = byte_string.split(b'\x00')
274    strings = []
275    for entry in byte_strings:
276        if entry != b'':
277            try:
278                strings.append(entry.decode('latin-1'))
279            except Exception:
280                try:
281                    strings.append(entry.decode('utf-8'))
282                except Exception as e:
283                    strings.append('<Decode Exception>: ' + str(e))
284    return '\n'.join(strings)

Parses and OPUS file block as text (e.g. history or file-log block).

The history (aka file-log) block of an OPUS file contains some information about how the file was generated and edits that have been performed on the file. This function parses the text block but does not take any steps to parameterizing what is contained in the text. The history block is generally not needed to retrieve the file data and metadata, but might be useful for inspecting the file.

Arguments:

blockbytes: raw bytes of the text block (e.g. history or file-log)

Returns:

text: string of text contained in the file block.