User:2 B/Debatably useful stuff

From Pikipedia, the Pikmin wiki
Jump to navigation Jump to search

Python script to clean up files from the Pikmin 3 text dump

For some reason this last update to this kinda broke the formatting... I have no idea why. Sorry?

# Change file here filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt" # Change these for your purposes textbox_separator = "\n<br>\n" include_pauses = False include_sound = False include_animation = False include_autoadvance = False include_lookat = False include_textsize = True include_variables = False include_ruby = True # Change text colors here colors = { # emphasis 0x01: "orange", 0x02: "#AA0", 0x03: "cyan", 0x26: "goldenrod", # pikmin colors 0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink", # leader colors 0x1a: "lightblue", 0x1d: "pink", 0x20: "lime", # ultra-spicy 0x25: "orange",} def decode(bytes, byte_order = "little"): if byte_order == "big": return bytes.decode("utf-16-be") else: return bytes.decode("utf-16-le") def clean_text(text, byte_order): characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake", "President") emotes = ("neutral", "shocked", "happy", "sad", "mad") buttons = [ '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09', 'photo flash', 'photo', '\\0c', '\\0d', '\\0e', '\\0f', '\\10', '\\11', '\\12', '\\13', 'roll right', '\\15', 'roll left', 'photo zoom', 'use onion', 'move', 'throw', 'whistle', 'dismiss', 'change type', 'reset camera', 'lock-on', 'charge', 'switch leader', 'spray', '\\23', '\\24', '\\25', '\\26', '\\27', '\\28', '\\29', '\\2a', '\\2b', 'pluck', 'cancel', '\\2e', 'shake', ] animations = {0: "idle", 2: "listening", 3: "looking at KopPad"} out = "" i = 0 color = False while i < len(text): if decode(text[i:i+2], byte_order) == '\x0e': i += 2 tag = decode(text[i:i+4], byte_order) extra_size = int.from_bytes(text[i+4:i+6], byte_order) i += 6 if tag == '\x00\x00': if include_ruby: kanji_count = int.from_bytes(text[i:i+2], byte_order) furigana_count = int.from_bytes(text[i+2:i+4], byte_order) furigana = text[i+4 : i+4 + furigana_count] kanji = text[i+4 + furigana_count : i+4 + furigana_count + kanji_count] out += f"<ruby>{decode(kanji, byte_order)}<rt>{decode(furigana, byte_order)}</rt></ruby>" i += kanji_count i += extra_size continue if tag == '\x00\x01': if False: out += f"[font: {int.from_bytes(text[i:i+2], byte_order, signed=True)}]" i += extra_size continue if tag == '\x00\x02': if include_textsize: out += f"[text size: {int.from_bytes(text[i:i+2], byte_order)}]" i += extra_size continue if tag == '\x00\x03': if text[i:i+2] == b'\xff\xff': if color: out += f"}}}}" color = False elif int.from_bytes(text[i:i+2], byte_order) in colors: if color: out += f"}}}}" out += f"{{{{color|2={colors[int.from_bytes(text[i:i+2], byte_order)]}|" color = True else: out += f"[unknown color: {hex(int.from_bytes(text[i:i+2], byte_order))}]" i += extra_size continue if tag == '\x00\x04': out += textbox_separator i += extra_size continue if tag[0] in ['\x01', '\x02', '\x03', '\x04', '\x05', '\x08', '\x09', '\x0d', '\x0e', '\x10', '\x11', '\x12', '\x13', '\x14', '\x16', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e']: if include_variables: name_len = int.from_bytes(text[i:i+2], byte_order) var_name = decode(text[i+2 : i+2 + name_len], byte_order) out += f"[variable #{ord(tag[0])}-{ord(tag[1])}: \"{var_name}\", other data: {text[i+2 + name_len : i+extra_size].hex()}]" else: out += "_" i += extra_size continue if tag == '\x07\x01': assert(text[i+1] == 0xcd) button_id = text[i] button_name = hex(button_id) if button_id < len(buttons): button_name = buttons[button_id] out += f"[button: {button_name}]" i += extra_size continue if tag == '\x07\x02': out += f"[GamePad]" i += extra_size continue if tag == '\x0a\x00': name_offset = 0 name_len = int.from_bytes(text[i + name_offset : i + name_offset+2], byte_order) name1 = decode(text[i + name_offset+2 :i + name_offset+2 + name_len]) name_offset += 2 + name_len name_len = int.from_bytes(text[i + name_offset : i + name_offset+2], byte_order) name2 = decode(text[i + name_offset+2 :i + name_offset+2 + name_len]) name_offset += 2 + name_len name_len = int.from_bytes(text[i + name_offset : i + name_offset+2], byte_order) name3 = decode(text[i + name_offset+2 :i + name_offset+2 + name_len]) name_offset += 2 + name_len + 0x16 name_len = int.from_bytes(text[i + name_offset : i + name_offset+2], byte_order) name4 = decode(text[i + name_offset+2 :i + name_offset+2 + name_len]) name_offset += 2 + name_len name_len = int.from_bytes(text[i + name_offset : i + name_offset+2], byte_order) name5 = decode(text[i + name_offset+2 :i + name_offset+2 + name_len]) out += f"[tag 0xA: \"{name1}\", \"{name2}\", \"{name3}\", \"{name4}\", \"{name5}\"]" i += extra_size continue if tag == '\x15\x00': if include_animation: detail = int.from_bytes(text[i:i+2]) high = detail >> 8 low = detail & 0x00FF character = characters[high] out += f"[animation: {character} {animations.get(low, low)}]" i += extra_size continue if tag == '\x15\x01': if include_lookat: character_name = characters[text[i]] target = characters[text[i+1]-1] out += f"[look at: {character_name} -> {target}]" i += extra_size continue if tag == '\x17\x00': filename_len = int.from_bytes(text[i:i+2], byte_order) filename = text[i+2 : i+2 + filename_len] name_len = int.from_bytes(text[i+2 + filename_len : i+2 + filename_len + 2], byte_order) name = text[i+2 + filename_len+2 : i+2 + filename_len+2 + name_len] out += f"[use text: \"{decode(name, byte_order)}\" from \"{decode(filename, byte_order)}\"]" i += extra_size continue if tag == '\x17\x01': if include_pauses: out += f"[pause: {int.from_bytes(text[i:i+2], byte_order)}]" i += extra_size continue if tag == '\x17\x02': character_id = text[i] icon_id = text[i+1] if icon_id == 0x19: # No icon character_name = characters[character_id] out += f"{character_name} (no icon): " elif icon_id == 0x1a: # Drake icon out += f"{{{{icon|S.S. Drake}}}} " if character_id != 5: out += f"(speaker mismatch, should be {characters[character_id]}) " else: if 0x1f <= icon_id <= 0x23: # President icons icon_chara = 6 emote = emotes[icon_id - 0x1f] else: icon_chara = icon_id % 5 emote = "neutral" if character_id == 5 else emotes[icon_id // 5] out += f"{{{{icon|{characters[icon_chara]}|v={emote}}}}} " if character_id != icon_chara: out += f"(speaker mismatch, should be {characters[character_id]}) " i += extra_size continue if tag == '\x17\x03': if include_autoadvance: out += f"[advance automatically]" i += extra_size continue if tag == '\x17\x04': name_len = int.from_bytes(text[i:i+2], byte_order) sound_name = decode(text[i+2 : i+2 + name_len], byte_order) if include_sound: out += f"[sound: \"{sound_name}\", {text[i+2 + name_len]}]" i += extra_size continue out += f"[unknown tag: {hex(ord(tag[0]))}, {hex(ord(tag[1]))}: \"{text[i:i+extra_size].hex()}\"]" i += extra_size continue out += decode(text[i:i+2], byte_order) i += 2 return out with open(filename, 'rb') as f: text = f.read() byte_order_mark = text[8:10] byte_order = "big" if byte_order_mark == b'\xfe\xff' else "little" text = text[0x20:] blocks = {} while len(text) != 0: block_sign = text[:4].decode() text = text[4:] block_size = int.from_bytes(text[:4], byte_order) text = text[4:] text = text[8:] blocks[block_sign] = text[:block_size] text = text[block_size:] if len(text) == 0: break while text[0] == 0xab: text = text[1:] if len(text) == 0: break block = blocks["LBL1"] label_group_count = int.from_bytes(block[:4], byte_order) labels = [] for group_id in range(label_group_count): label_count = int.from_bytes(block[4 + 8*group_id : 4 + 8*group_id + 4], byte_order) offset = int.from_bytes(block[4 + 8*group_id + 4 : 4 + 8*group_id + 8], byte_order) i = offset for label_id in range(label_count): label_size = int.from_bytes(block[i:i+1], byte_order) i += 1 try: label = block[i : i + label_size].decode() i += label_size labels.append((label, int.from_bytes(block[i:i+4], byte_order))) i += 4 except UnicodeDecodeError: i += 4 block = blocks["TXT2"] message_count = int.from_bytes(block[:4], byte_order) message_offsets = [] for message_id in range(message_count): offset = int.from_bytes(block[4 + 4*message_id : 4 + 4*message_id + 4], byte_order) message_offsets.append(offset) message_offsets.append(len(block)) messages = {} for label, message_id in labels: if message_id > message_count: continue start_offset = message_offsets[message_id] end_offset = len(block) for offset in message_offsets: if offset > start_offset and offset < end_offset: end_offset = offset raw_message = block[start_offset:end_offset] message = "" for i in range(0,len(raw_message),2): message += chr(int.from_bytes(raw_message[i:i+2], byte_order)) messages[label] = clean_text(raw_message, byte_order)[:-1] sorted_labels = sorted(messages.keys()) for label in sorted_labels: print(f"=={label}==\n{messages[label]}\n")