Source code for niemafs.gcn

#! /usr/bin/env python
'''
Handle Nintendo GameCube file systems
'''

# NiemaFS imports
from niemafs.common import clean_string, FileSystem

# imports
from datetime import datetime
from io import BytesIO
from pathlib import Path
from struct import pack, unpack
from warnings import warn


[docs]
class GcmFS(FileSystem):
    '''Class to represent a `Nintendo GameCube GCM mini-DVD <https://www.gc-forever.com/yagcd/chap13.html#sec13>`_.'''
    def __init__(self, file_obj, path=None):
        # set things up
        if file_obj is None:
            raise ValueError("file_obj must be a file-like")
        super().__init__(path=path, file_obj=file_obj)
        self.boot_bin = None   # Disk Header (boot.bin)
        self.bi2_bin = None    # Disk Header Information (bi2.bin)
        self.appldr_bin = None # Apploader (appldr.bin)
        self.fst_bin = None    # File System Table (FST) (fst.bin)

        # load header to ensure file validity up-front
        self.get_boot_bin()
        self.get_bi2_bin()
        self.get_appldr_bin()
        self.get_fst_bin()


[docs]
    def parse_ascii_date(data):
        '''Parse a date in the "YYYY/MM/DD" format.

        Args:
            `data` (`bytes`): A date in the "YYYY/MM/DD" format.

        Returns:
            `datetime`: A Python `datetime` object.
        '''
        if isinstance(data, bytes):
            data = data.decode()
        return datetime.strptime(data, '%Y/%m/%d')



[docs]
    def get_boot_bin(self):
        '''Return the `Disk Header ("boot.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.1>`_ of the GCM.

        Returns:
            `bytes`: The Disk Header ("boot.bin") of the GCM.
        '''
        if self.boot_bin is None:
            self.boot_bin = self.read_file(0x0000, 0x0440)
        return self.boot_bin



[docs]
    def get_bi2_bin(self):
        '''Return the `Disc Header Information ("bi2.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.2>`_ of the GCM.

        Returns:
            `bytes`: The Disc Header Information ("bi2.bin") of the GCM.
        '''
        if self.bi2_bin is None:
            self.bi2_bin = self.read_file(0x0440, 0x2000)
        return self.bi2_bin



[docs]
    def get_appldr_bin(self):
        '''Return the `Apploader ("appldr.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.3>`_ of the GCM.

        Returns:
            `bytes`: The Apploader ("appldr.bin") of the GCM.
        '''
        if self.appldr_bin is None:
            size = unpack('>I', self.read_file(0x2440 + 0x0014, 4))[0]
            self.appldr_bin = self.read_file(0x2440, size)
        return self.appldr_bin



[docs]
    def get_fst_bin(self):
        '''Return the `File System Table (FST, "fst.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.4>`_ of the GCM.

        Returns:
            `bytes`: The File System Table (FST, "fst.bin") of the GCM.
        '''
        if self.fst_bin is None:
            parsed_boot_bin = self.parse_boot_bin()
            self.fst_bin = self.read_file(parsed_boot_bin['fst_offset'], parsed_boot_bin['fst_size'])
        return self.fst_bin



[docs]
    def parse_boot_bin(self):
        '''Return a parsed version of the `Disk Header ("boot.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.1>`_ of the GCM.

        Returns:
            `dict`: A parsed version of the Disk Header ("boot.bin") of the GCM.
        '''
        # set things up
        data = self.get_boot_bin()
        out = dict()

        # parse raw Disk Header (boot.bin) data
        out['game_code'] =             data[0x0000 : 0x0004]                  # Game Code "XYYZ": X = Console ID, YY = Game Code, Z = Country Code
        out['maker_code'] =            data[0x0004 : 0x0006]                  # Maker Code
        out['disk_id'] =               data[0x0006]                           # Disk ID
        out['version'] =               data[0x0007]                           # Version
        out['audio_streaming'] =       data[0x0008]                           # Audio Streaming
        out['stream_buffer_size'] =    data[0x0009]                           # Stream Buffer Size
        out['offsets_0x000A_0x001B'] = data[0x000A : 0x001C]                  # Unused (should be 0s)
        out['dvd_magic_word'] =        data[0x001C : 0x0020]                  # DVD Magic Word (should be 0xc2339f3d)
        out['game_name'] =             data[0x0020 : 0x0400]                  # Game Name
        out['debug_monitor_offset'] =  unpack('>I', data[0x0400 : 0x0404])[0] # Offset of Debug Monitor (dh.bin)?
        out['debug_monitor_address'] = unpack('>I', data[0x0404 : 0x0408])[0] # Address(?) to load Debug Monitor (dh.bin)?
        out['offsets_0x0408_0x0419'] = data[0x0408 : 0x0420]                  # Unused (should be 0s)
        out['main_dol_offset'] =       unpack('>I', data[0x0420 : 0x0424])[0] # Offset of Main Executable Bootfile (main.dol)
        out['fst_offset'] =            unpack('>I', data[0x0424 : 0x0428])[0] # Offset of FST (fst.bin)
        out['fst_size'] =              unpack('>I', data[0x0428 : 0x042C])[0] # Size of FST (fst.bin)
        out['max_fst_size'] =          unpack('>I', data[0x042C : 0x0430])[0] # Max Size of FST (fst.bin) (usually same as previous, except in multi-disc games)
        out['user_position'] =         unpack('>I', data[0x0430 : 0x0434])[0] # User Position(?)
        out['user_length'] =           unpack('>I', data[0x0434 : 0x0438])[0] # User Length(?)
        out['offsets_0x0438_0x043B'] = data[0x0438 : 0x043C]                  # Unknown
        out['offsets_0x043C_0x043F'] = data[0x043C : 0x0440]                  # Unused (should be 0s)

        # clean strings
        for k in ['game_code', 'maker_code', 'game_name']:
            try:
                out[k] = clean_string(out[k])
            except:
                warn("Unable to parse Disk Header (boot.bin) '%s' as string: %s" % (k, out[k]))

        # return final parsed data
        return out



[docs]
    def parse_bi2_bin(self):
        '''Return a parsed version of the `Disc Header Information ("bi2.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.2>`_ of the GCM.

        Returns:
            `dict`: A parsed version of the Disc Header Information ("bi2.bin") of the GCM.
        '''
        # set things up
        data = self.get_bi2_bin()
        out = dict()

        # parse raw Disk Header Information (bi2.bin) data
        out['debug_monitor_size'] =    unpack('>I', data[0x0000 : 0x0004])[0] # Debug-Monitor Size
        out['simulated_memory_size'] = unpack('>I', data[0x0004 : 0x0008])[0] # Simulated Memory Size
        out['argument_offset'] =       unpack('>I', data[0x0008 : 0x000C])[0] # Argument Offset
        out['debug_flag'] =            unpack('>I', data[0x000C : 0x0010])[0] # Debug Flag
        out['track_location'] =        unpack('>I', data[0x0010 : 0x0014])[0] # Track Location
        out['track_size'] =            unpack('>I', data[0x0014 : 0x0018])[0] # Track Size
        out['country_code'] =          unpack('>I', data[0x0018 : 0x001C])[0] # Country Code

        # return final parsed data
        return out



[docs]
    def parse_appldr_bin(self):
        '''Return a parsed version of the `Apploader ("appldr.bin") <https://www.gc-forever.com/yagcd/chap13.html#sec13.3>`_ of the GCM.

        Returns:
            `dict`: A parsed version of the Apploader ("appldr.bin") of the GCM.
        '''
        # set things up
        data = self.get_appldr_bin()
        out = dict()

        # parse raw Apploader (appldr.bin) data
        out['date'] =                  data[0x0000 : 0x000A]                  # Date (Version) of the Apploader
        out['offsets_0x000A_0x000F'] = data[0x000A : 0x0010]                  # Padding (should be 0s)
        out['apploader_entrypoint'] =  unpack('>I', data[0x0010 : 0x0014])[0] # Apploader Entrypoint
        out['apploader_size'] =        unpack('>I', data[0x0014 : 0x0018])[0] # Apploader Size
        out['trailer_size'] =          unpack('>I', data[0x0018 : 0x001C])[0] # Trailer Size
        out['offsets_0x001C_0x001F'] = data[0x001C : 0x0020]                  # Padding(?)
        out['apploader_code'] =        data[0x0020:]                          # Apploader Code (loaded to 0x81200000 in RAM)

        # parse dates
        for k in ['date']:
            try:
                out[k] = GcmFS.parse_ascii_date(out[k])
            except:
                warn("Unable to parse Apploader (appldr.bin) '%s' as date: %s" % (k, out[k]))

        # return final parsed data
        return out



[docs]
    def parse_fst(fst_ind, parent_path, fst, string_table_start):
        '''Recursively parse the `File System Table (FST) <https://www.gc-forever.com/yagcd/chap13.html#sec13.4.1>`_.

        Args:
            `fst_ind` (`int`): This entry's index in the FST.

            `parent_path` (`Path`): The local `Path` of parent of this file/directory, or `None` if this is the root directory.

            `fst` (`bytes`): The raw bytes of the FST.

            `string_table_start` (`int`): Offset in the FST where the string table begins.

        Returns:
            `dict`: The root of the parsed FST.
        '''
        # set things up
        out = {'children':list(), 'is_root':False}

        # parse raw FST entry data
        entry_start_offset = fst_ind * 12
        is_dir =        bool(fst[entry_start_offset])                                                   # Flags: 0 = file, 1 = directory
        st_index =      unpack('>I', b'\x00' + fst[entry_start_offset + 1 : entry_start_offset + 4])[0] # Filename as Offset into String Table
        out['offset'] = unpack('>I', fst[entry_start_offset + 4 : entry_start_offset + 8])[0]           # File Offset (for files) or Parent Offset (for directories)
        out['length'] = unpack('>I', fst[entry_start_offset + 8 : entry_start_offset + 12])[0]          # File Size (for files) or Number of Entries (for root) or Next Offset (for directories

        # determine path
        if parent_path is None: # root
            out['path'] = Path('.')
            out['is_root'] = True
        else: # non-root
            fn_fst_offset = string_table_start + st_index
            fn = fst[fn_fst_offset : fst.find(b'\x00', fn_fst_offset)]
            try:
                fn = fn.decode()
            except:
                warn("Failed to parse filename as string: %s" % fn)
                fn = str(fn)
            out['path'] = parent_path / fn

        # if directory, recursively parse children
        if is_dir:
            fst_ind += 1
            while fst_ind < out['length']:
                child = GcmFS.parse_fst(fst_ind, out['path'], fst, string_table_start)
                fst_ind = child['fst_ind_next']
                out['children'].append(child)
            out['fst_ind_next'] = fst_ind
        else:
            out['fst_ind_next'] = fst_ind + 1

        # return final parsed data
        return out


    def __iter__(self):
        fst = self.get_fst_bin()
        string_table_start = 0x0C * unpack('>I', fst[0x08 : 0x0C])[0]
        to_visit = [GcmFS.parse_fst(0, None, fst, string_table_start)] # start at root directory
        while len(to_visit) != 0:
            file_entry = to_visit.pop()
            if len(file_entry['children']) == 0:
                file_data = self.read_file(file_entry['offset'], file_entry['length'])
            else:
                file_data = None
                to_visit += file_entry['children'][::-1] # descending order into stack = ascending order when popped
            if not file_entry['is_root']:
                yield (file_entry['path'], None, file_data)



[docs]
class TgcFS(FileSystem):
    '''Class to represent a `Nintendo GameCube TGC image <https://hitmen.c02.at/files/yagcd/yagcd/chap14.html#sec14.8>`_.'''
    def __init__(self, file_obj, path=None):
        # set things up
        if file_obj is None:
            raise ValueError("file_obj must be a file-like")
        super().__init__(path=path, file_obj=file_obj)
        self.header = None # Header
        self.gcm = None    # Embedded GCM Data (bogus values fixed)


[docs]
    def get_header(self):
        '''Return the `Header <https://hitmen.c02.at/files/yagcd/yagcd/chap14.html#sec14.8.1>`_ of the TGC.

        Returns:
            `bytes`: The Header of the TGC.
        '''
        if self.header is None:
            self.header = self.read_file(0x00, 0x38)
        return self.header



[docs]
    def parse_header(self):
        '''Return a parsed version of the `Header <https://hitmen.c02.at/files/yagcd/yagcd/chap14.html#sec14.8.1>`_ of the TGC.

        Returns:
            `dict`: A parsed version of the Header of the TGC.
        '''
        # set things up
        data = self.get_header()
        out = dict()

        # parse raw Header data
        out['magic_word'] =             data[0x00 : 0x04]                  # TGC Magic Word (should be 0xae0f38a2)
        out['offsets_0x04_0x07'] =      data[0x04 : 0x08]                  # Unknown (usually all 0s)
        out['header_size'] =            unpack('>I', data[0x08 : 0x0C])[0] # Header Size (usually 0x8000)
        out['offsets_0x0C_0x0F'] =      data[0x0C : 0x10]                  # Unknown (usually 0x00100000)
        out['embedded_fst_offset'] =    unpack('>I', data[0x10 : 0x14])[0] # Offset to FST inside embedded GCM
        out['embedded_fst_size'] =      unpack('>I', data[0x14 : 0x18])[0] # Size of FST inside embedded GCM
        out['embedded_max_fst_size'] =  unpack('>I', data[0x18 : 0x1C])[0] # Max Size of FST inside embedded GCM
        out['embedded_boot_offset'] =   unpack('>I', data[0x1C : 0x20])[0] # Offset to Boot-DOL inside embedded GCM
        out['embedded_boot_size'] =     unpack('>I', data[0x20 : 0x24])[0] # Size of Boot-DOl inside embedded GCM
        out['file_area_offset'] =       unpack('>I', data[0x24 : 0x28])[0] # Offset to File Area inside embedded GCM
        out['file_area_size'] =         unpack('>I', data[0x28 : 0x2C])[0] # Size of File Area
        out['embedded_banner_offset'] = unpack('>I', data[0x2C : 0x30])[0] # Offset to Banner inside embedded GCM(?)
        out['embedded_banner_size'] =   unpack('>I', data[0x30 : 0x34])[0] # Size of Banner inside embedded GCM(?)
        out['fst_spoof_amount'] =       unpack('>I', data[0x34 : 0x38])[0] # FST Spoof Amount

        # return final parsed data
        return out



[docs]
    def get_gcm(self):
        '''Return the `Embedded GCM <https://hitmen.c02.at/files/yagcd/yagcd/chap14.html#sec14.8.2>`_ of the TGC.

        Args:
            `fix` (`bool`): `True` to update the offsets of the embedded GCM data with their correct values based on the Header, otherwise `False` to return the raw embedded GCM data.

        Returns:
            `bytes`: The Embedded GCM of the TGC.
        '''
        if self.gcm is None:
            # load raw GCM data
            header = self.parse_header()
            header_size = header['header_size']
            gcm_data = bytearray(self.read_file(header_size))

            # fix Boot-DOL and FST offsets
            fixed_boot_offset = header['embedded_boot_offset'] - header_size
            fixed_fst_offset = header['embedded_fst_offset'] - header_size
            gcm_data[0x0420 : 0x0424] = pack('>I', fixed_boot_offset)
            gcm_data[0x0424 : 0x0428] = pack('>I', fixed_fst_offset)

            # fix FST entries
            delta = header['file_area_offset'] - header['fst_spoof_amount'] - header_size
            num_entries = unpack('>I', gcm_data[fixed_fst_offset + 8 : fixed_fst_offset + 12])[0]
            for entry_offset in range(fixed_fst_offset, fixed_fst_offset + (12*num_entries), 12):
                if not bool(gcm_data[entry_offset]): # only update files
                    orig_offset = unpack('>I', gcm_data[entry_offset + 4 : entry_offset + 8])[0]
                    fixed_offset = orig_offset + delta
                    gcm_data[entry_offset + 4 : entry_offset + 8] = pack('>I', fixed_offset)

            # finalize
            self.gcm = bytes(gcm_data)
        return self.gcm


    def __iter__(self):
        return iter(GcmFS(BytesIO(self.get_gcm())))



[docs]
class GcRarcFS(FileSystem):
    '''Class to represent a `Nintendo GameCube RARC (.arc) archives <https://www.gc-forever.com/yagcd/chap15.html#sec15.3>`_.'''
    def __init__(self, file_obj, path=None):
        # set things up
        if file_obj is None:
            raise ValueError("file_obj must be a file-like")
        super().__init__(path=path, file_obj=file_obj)
        self.header = None      # Header
        self.data_header = None # Data Header


[docs]
    def get_header(self):
        '''Return the `Header <https://www.lumasworkshop.com/wiki/RARC_(File_Format)>`_ of the RARC.

        Returns:
            `bytes`: The Header of the RARC.
        '''
        if self.header is None:
            self.header = self.read_file(0x00, 0x20)
        return self.header



[docs]
    def parse_header(self):
        '''Return a parsed version of the `Header <https://www.lumasworkshop.com/wiki/RARC_(File_Format)#Header>`_ of the RARC.

        Returns:
            `dict`: A parsed version of the Header of the RARC.
        '''
        # set things up
        data = self.get_header()
        out = dict()

        # parse raw Header data
        out['magic_word'] =          data[0x00 : 0x04]                  # should be 'RARC'
        out['size'] =                unpack('>I', data[0x04 : 0x08])[0] # Size of Entire File
        out['data_header_offset'] =  unpack('>I', data[0x08 : 0x0C])[0] # Data Header Offset (always 0x20)
        out['data_start_offset'] =   unpack('>I', data[0x0C : 0x10])[0] # Data Start Offset (add 0x20 to this value)
        out['data_section_size'] =   unpack('>I', data[0x10 : 0x14])[0] # Size of File Data Section
        out['mram_size'] =           unpack('>I', data[0x14 : 0x18])[0] # Size of All MRAM Files in File Data Section
        out['aram_size'] =           unpack('>I', data[0x18 : 0x1C])[0] # Size of All ARAM Files in File Data Section
        out['dvd_size'] =            unpack('>I', data[0x1C : 0x20])[0] # Size of All DVD Files in File Data Section

        # clean strings
        for k in ['magic_word']:
            try:
                out[k] = clean_string(out[k])
            except:
                warn("Unable to parse Header '%s' as string: %s" % (k, out[k]))

        # check for validity
        if out['magic_word'] != 'RARC':
            warn("RARC magic word should be 'RARC', but it was: %s" % out['magic_word'])

        # return final parsed data
        return out



[docs]
    def get_data_header(self):
        '''Return the `Data Header <https://www.lumasworkshop.com/wiki/RARC_(File_Format)#Data_Header>`_ of the RARC.

        Returns:
            `bytes`: The Data Header of the RARC.
        '''
        if self.data_header is None:
            self.data_header = self.read_file(self.parse_header()['data_header_offset'], 0x20)
        return self.data_header



[docs]
    def parse_data_header(self):
        '''Return a parsed version of the `Data Header <https://www.lumasworkshop.com/wiki/RARC_(File_Format)#Data_Header>`_ of the RARC.

        Returns:
            `dict`: A parsed version of the Data Header of the RARC.
        '''
        # set things up
        data = self.get_data_header()
        out = dict()

        # parse raw Data Header data
        out['num_dirs'] =            unpack('>I', data[0x00 : 0x04])[0] # Number of Directory Nodes
        out['dir_offset'] =          unpack('>I', data[0x04 : 0x08])[0] # Offset to Directory Node Section (always 0x20) (add 0x20 to this)
        out['num_files'] =           unpack('>I', data[0x08 : 0x0C])[0] # Number of File Nodes
        out['file_offset'] =         unpack('>I', data[0x0C : 0x10])[0] # Offset to File Node Section (add 0x20 to this)
        out['string_table_size'] =   unpack('>I', data[0x10 : 0x14])[0] # Size of String Table
        out['string_table_offset'] = unpack('>I', data[0x14 : 0x18])[0] # Offset to String Table (add 0x20 to this)
        out['next_file_index'] =     unpack('>H', data[0x18 : 0x1A])[0] # Next Available File Index (number of File Nodes that are files?)
        out['keep_file_ID_sync'] =   bool(data[0x1A])                   # Keep File IDs Synced
        out['offsets_0x1B_0x1F'] =   data[0x1B : 0x20]                  # Padding (all 0s)

        # return final parsed data
        return out



[docs]
    def parse_dir_node(data):
        '''Return a parsed version of a `Directory Node <https://www.lumasworkshop.com/wiki/RARC_(File_Format)#Directory_Node_section>`_ of the RARC.

        Args:
            `data` (`bytes`): The raw Directory Node data

        Returns:
            `dict`: A parsed version of the Directory Node
        '''
        # set things up
        if len(data) != 16:
            raise ValueError("Directory Node data must be exactly 16 bytes: %s" % data)
        out = dict()

        # parse raw node data
        out['name_prefix'] =      data[0x00 : 0x04]                  # First 4 Characters of Directory Name (all caps)
        out['name_offset'] =      unpack('>I', data[0x04 : 0x08])[0] # Offset of Directory Name in String Table
        out['name_hash'] =        unpack('>H', data[0x08 : 0x0A])[0] # Hash of Directory Name
        out['num_files'] =        unpack('>H', data[0x0A : 0x0C])[0] # Number of Files in this Directory Node
        out['file_nodes_index'] = unpack('>I', data[0x0C : 0x10])[0] # Index of First File Node in this Directory Node

        # clean strings
        for k in ['name_prefix']:
            try:
                out[k] = clean_string(out[k])
            except:
                warn("Unable to parse Header '%s' as string: %s" % (k, out[k]))

        # return final parsed data
        return out



[docs]
    def parse_file_node(data):
        '''Return a parsed version of a `File Node <https://wiki.tockdom.com/wiki/RARC_(File_Format)#Directory>`_ of the RARC.

        Args:
            `data` (`bytes`): The raw File Node data

        Returns:
            `dict`: A parsed version of the File Node
        '''
        # set things up
        if len(data) != 0x14:
            raise ValueError("File Node data must be exactly 0x14 bytes: %s" % data)
        out = dict()

        # parse raw node data
        out['index'] =             unpack('>H', data[0x00 : 0x02])[0]           # Node Index (0xFFFF if this is a subdirectory)
        out['name_hash'] =         unpack('>H', data[0x02 : 0x04])[0]           # Hash of Node Name
        out['attributes'] =        data[0x04]                                   # Node Attributes
        out['name_offset'] =       unpack('>I', b'\x00' + data[0x05 : 0x08])[0] # Name Offset in String Table
        out['offset'] =            unpack('>I', data[0x08 : 0x0C])[0]           # If File: Offset in File Data Section; If Directory: Index in Directory Node Section
        out['size'] =              unpack('>I', data[0x0C : 0x10])[0]           # If File: Size of File's Data; If Directory: always 0x10
        out['offsets_0x10_0x13'] = data[0x10 : 0x14]                            # Unknown (all 0s?)

        # parse node attributes
        out['attributes'] = GcRarcFS.parse_node_attributes(out['attributes'])

        # return final parsed data
        return out



[docs]
    def parse_node_attributes(x):
        '''Return a parsed version of the `Attributes <https://www.lumasworkshop.com/wiki/RARC_(File_Format)#Node_Attributes>`_ of a File Node of the RARC.

        Args:
            `x` (`int`): The integer representation of the Attributes of a File Node.

        Returns:
            `dict`: A parsed version of the Attributes.
        '''
        return {
            'is_file':       bool(x & 0x01), # Node is a File
            'is_dir':        bool(x & 0x02), # Node is a Directory
            'is_compressed': bool(x & 0x04), # Node's File is Compressed
            'preload_mram':  bool(x & 0x10), # Preload File to Main RAM (MRAM)
            'preload_aram':  bool(x & 0x20), # Preload File to Auxiliary RAM (ARAM)
            'load_dvd':      bool(x & 0x40), # Load File from DVD when Needed
            'is_yaz0':       bool(x & 0x80), # Node is YAZ0-Compressed ('is_compressed' should be True as well)
        }


    def __iter__(self):
        # set things up
        header = self.parse_header()
        data_header = self.parse_data_header()
        dir_nodes_start = data_header['dir_offset'] + 0x20
        dir_nodes = [GcRarcFS.parse_dir_node(self.read_file(i,16)) for i in range(dir_nodes_start, dir_nodes_start + (16 * data_header['num_dirs']), 16)]
        file_nodes_start = data_header['file_offset'] + 0x20
        file_nodes = [GcRarcFS.parse_file_node(self.read_file(i,0x14)) for i in range(file_nodes_start, file_nodes_start + (0x14 * data_header['num_files']), 0x14)]
        string_table = self.read_file(data_header['string_table_offset'] + 0x20, data_header['string_table_size'])
        file_data_start = header['data_start_offset'] + 0x20

        # load node names
        for node in dir_nodes + file_nodes:
            node['name'] = clean_string(string_table[node['name_offset'] : string_table.find(b'\x00',node['name_offset'])])

        # iterate over all files
        dir_nodes[0]['parent_path'] = Path('.')
        for dir_node_ind, dir_node in enumerate(dir_nodes):
            dir_path = dir_node['parent_path'] / dir_node['name']
            yield (dir_path, None, None)
            for file_node_ind in range(dir_node['file_nodes_index'], dir_node['file_nodes_index'] + dir_node['num_files']):
                file_node = file_nodes[file_node_ind]
                if file_node['attributes']['is_dir']:
                    if file_node['name'] in {'.', '..'}:
                        continue # skip current ('.') and parent ('..') directories
                    dir_nodes[file_node['offset']]['parent_path'] = dir_path
                else:
                    yield (dir_path / file_node['name'], None, self.read_file(file_data_start + file_node['offset'], file_node['size']))