#!/usr/bin/env python """ transmatte.py -- an IF excerpting tool Created by Andrew Plotkin Last updated: March 4, 2010 Web site: This script is in the public domain. Do anything. Usage: python transmatte.py [ options ] trans1.txt trans2.txt ... > page.html Given a set of IF transcript files, this generates an HTML page that lets the reader flip between them. The transcripts are merged, as much as possible. That is, common sequences of commands are displayed as static data; differences are displayed with links that let the reader jump between the transcript variations. If this makes no sense to you, go to the web site (see above) and try the examples. Options: -p, --prompt: Define the string that identifies command inputs. (Default is ">") -a, --annotation: Define the string that identifies annotation lines. (Default is "!") A certain amount of cleanup is done on the transcripts. Trailing whitespace is stripped from each line. Blank lines before and after a command line are ignored (but blank lines in the middle of a game response are kept). Lines beginning with "!" are special; they will not appear in the output. Instead, they are annotations which help customize the output. Annotation lines have the form "!FUNCTION: VALUE". (An annotation line can appear right before a command prompt line or after it. If you put one at the end of a game response, it'll be glomped up by the following command line.) !title: Defines the page title. This must be placed on or before the first command. !subtitle: Defines the page subtitle. This must be placed on or before the first command. !note: This line defines a game note for the current command. Game notes appear as-is in the right column of the generated page. !option: This line defines the label for a command. (In the choice links in the left column.) When two of your transcripts differ, use this annotation on the commands that are different. !otheroption: Sometimes you have one transcript that differs only in the absence of a command. So there's no command there to put an "!option" line on. Instead, put an "!otheroption" line in the *other* transcript, on the appropriate command. !priority: This defines the (numeric) priority for a command. In the choice links, the high-priority command appears first, and is the default choice. (That is, it will be visible when the page is first loaded.) """ import sys import re import optparse popt = optparse.OptionParser() popt.add_option('-p', '--prompt', action='store', dest='prompt') popt.add_option('-a', '--annotation', action='store', dest='annotation') popt.set_defaults( prompt = '>', annotation = '!', ) (opts, args) = popt.parse_args() if not args: sys.stderr.write('usage: transmatte.py file1 file2 file3...\n') sys.exit(1) class Spindle: """Spindle: A data structure which is like a tree, except that instead of branching at the bottom, it branches in the middle. That is, a spindle consists of a chain of nodes, except that at one point (optionally), the chain is interrupted by a fork into one or more child spindles. You go down the chain, hit the fork, branch out into one of the children, and then merge back into the chain afterwards. In regexp syntax, this would look like "^head(S1|S2|S3)tail$". Where S1, S2, S3 (etc) are themselves spindles. Note that either the head or the tail (or both) could be empty. If there's no fork point, we call it a "straight spindle", and it's simply a list of nodes (possibly the empty list). Constructors: Spindle(seq) -- Create a straight spindle. The argument can be any sequence, such as a list or tuple. A string argument will be broken up into a sequence of characters. Spindle(seqhead, children, seqtail) -- Create a spindle with the given sequence, followed by a fork, followed by the given tail sequence. The children argument should be a tuple/list of Spindles. (Note that while you can create a Spindle containing any type of nodes, the process() function defined later on assumes that the contents are Node objects.) """ index_counter = 0 def __init__(self, seq, children=None, seqtail=None): Spindle.index_counter += 1 self.index = Spindle.index_counter if (children is None): self.straight = True self.seq = tuple(seq) self.seqhead = self.seq self.seqtail = self.seq self.length = len(self.seq) self.children = None else: self.straight = False self.seqhead = tuple(seq) self.seqtail = tuple(seqtail) self.children = tuple(children) self.length = len(self.seqhead) + len(self.seqtail) def __repr__(self): if (self.straight): return ('' % (self.index, self.seq)) else: return ('' % (self.index, self.seqhead, len(self.children), self.seqtail)) def dump(self, depth=0): """dump() -> None Print a tree reprentation of the spindle and its children. For debugging. """ if (self.straight): val = ''.join(self.seq) if (not val): val = '()' print ' '*depth + val else: val = ''.join(self.seqhead) if (not val): val = '()' print ' '*depth + '\\' + val for spin in self.children: spin.dump(depth+1) val = ''.join(self.seqtail) if (not val): val = '()' print ' '*depth + '/' + val def firstnode(self): """firstnode() -> tuple of nodes Return the first node in the spindle. If the fork occurs right at the beginning, returns the first node of *all* of the children. If it can't find any nodes, this returns the empty tuple. """ if (self.straight): # This will be empty is seq is empty. return self.seq[0:1] else: if (self.seqhead): return self.seqhead[0:1] res = [] for spin in self.children: res.extend(spin.firstnode()) if res: return tuple(res) if (self.seqtail): return self.seqtail[0:1] return () def trim(self, headlen, taillen): """trim(headlen, taillen) -> (tuple, tuple) Chop headlen nodes off the head of the spindle, and taillen nodes off the tail. Return the two chopped-off sequences, as tuples. (Either or both arguments can be zero, in which case the corresponding return value will be empty.) This assumes that the spindle *has* at least headlen nodes before the fork, and taillen nodes after it. (Or, for a straight spindle, at least headlen+taillen nodes overall.) If it doesn't, this will do something which is probably wrong. """ headseq = self.seqhead[ 0 : headlen ] tailseq = self.seqtail[ len(self.seqtail)-taillen : ] if (self.straight): self.seq = self.seq[ headlen : len(self.seq)-taillen ] self.length = len(self.seq) else: self.seqhead = self.seqhead[ headlen : ] self.seqtail = self.seqtail[ 0 : len(self.seqtail)-taillen ] self.length = len(self.seqhead) + len(self.seqtail) return (headseq, tailseq) def getnote(self, key): """getnode(key) -> value Look at the first node in this spindle. If it contains the given key, return its value. If the spindle start with a fork, look up the given key in the first nodes of *all* the children. If nothing can be found, return None """ ls = self.firstnode() for node in ls: val = node.getnote(key) if (val): return val return None def merge(spin1, spin2, headlen, taillen): """merge(spin1, spin2, headlen, taillen) -> Spindle Given two spindles, which have headlen matching nodes at the beginning and taillen matching nodes at the end, merge them. This returns a new spindle, which begins with the common headlen nodes, ends with the common taillen nodes, and contains the appropriate fork in between. This takes for granted that the spindles actually do have those nodes in common. (Annotations will be merged between common nodes.) This mutates (or mutilates) spin1 and spin2. Do not try to use them after passing them to this function. This attempts to be clever about minimizing forks. For example, if you merge "abc(123|456)def" with "abc(789|0)def", you'll get a four-way spindle: "abc(123|456|789|0)def". """ (headseq1, tailseq1) = spin1.trim(headlen, taillen) (headseq2, tailseq2) = spin2.trim(headlen, taillen) headseq = [ nod1.update(nod2) for (nod1, nod2) in zip(headseq1, headseq2) ] tailseq = [ nod1.update(nod2) for (nod1, nod2) in zip(tailseq1, tailseq2) ] # spin1 and spin2 have now been trimmed of their common portions. # See if we can merge the remainder of one into the other. if (not (spin1.straight or spin2.straight) and spin1.length == 0 and spin2.length == 0): ls = spin1.children + spin2.children spin = Spindle(headseq, ls, tailseq) return spin if (not spin1.straight and spin1.length == 0): ls = spin1.children + (spin2,) spin = Spindle(headseq, ls, tailseq) return spin if (not spin2.straight and spin2.length == 0): ls = spin2.children + (spin1,) spin = Spindle(headseq, ls, tailseq) return spin # No efficient way to do it; create a new spindle that forks between # (what's left of) spin1 and spin2. spin = Spindle(headseq, [spin1, spin2], tailseq) return spin def compare(spin1, spin2): """compare(spin1, spin2) -> (int, int) Compare the two spindles, and count how many nodes they have in common at the beginning, and at the end. Return these two values. The comparison doesn't extend into children of the spindle. Only the head and tail sequences are considered. If you compare a straight spindle to itself, you'll get (len, 0). This is probably not what you want, so don't do that. (This is why the program rejects loading two identical transcripts.) """ # Note: if I cared about efficiency, I'd cache the results of this # call with (spin1,spin2) as the cache key. # In a straight spindle, seqhead and seqtail are both copies of the # entire spindle sequence. This is for the convenience of this # function. maxval = min(len(spin1.seqhead), len(spin2.seqhead)) ix = 0 while (ix < maxval): if (spin1.seqhead[ix] != spin2.seqhead[ix]): break ix += 1 sharehead = ix maxval = min(len(spin1.seqtail), len(spin2.seqtail)) ix = 0 while (ix < maxval): if (spin1.seqtail[-1-ix] != spin2.seqtail[-1-ix]): break ix += 1 sharetail = ix if (sharehead + sharetail > spin1.length): sharetail = spin1.length - sharehead if (sharehead + sharetail > spin2.length): sharetail = spin2.length - sharehead return (sharehead, sharetail) def number_spindle(spin, parent=None, num=0): """number_spindle(spin) -> None Go down a spindle (and all its children). In each one, store the choice number -- the position of the spindle in its parent's "children" array. (The root spindle's number defaults to 0.) """ spin.parent = parent spin.choicenum = num spin.extrachoiceset = None if (not spin.straight): ls = [] for child in spin.children: priority = 0 val = child.getnote('priority') if (val): try: priority = int(val) except: pass ls.append( (-priority, child.index, child) ) ls.sort() spin.children = tuple([ nod for (_, _, nod) in ls ]) ix = 0 for child in spin.children: number_spindle(child, spin, ix) ix += 1 def emplace_choices(spin): """emplace_choices(spin) -> None Go down a spindle (and all its children). In each one that forks, add a choicelist descriptor at the beginning of each of the forked children. (A choicelist descriptor is the information we'll need to generate a set of choice links. This consists of a reference to the parent node, and the child's number (so we know which link gets boldfaced.)) A complication: a spindle might be totally empty, so there's no child node to stick the descriptor on. In that case, we put it directly on the spindle. """ if (not spin.straight): for child in spin.children: tup = (spin, child.choicenum) ls = child.firstnode() if not ls: child.extrachoiceset = tup else: for node in ls: node.choicesets.append(tup) for child in spin.children: emplace_choices(child) def process(spindles): """process(spindles) -> Spindle Given a list of spindles, merge them all as much as possible. Return a single spindle whose branches cover all of the inputs. This is not particularly efficient. It compares every spindle to every other spindle, in order to find the pair which have the most in common. It merges those two, and then starts the whole process over. A lot of redundant comparisons, in other words. """ while True: results = [] for spin1 in spindles: for spin2 in spindles: if (spin1 == spin2): break (sharehead, sharetail) = compare(spin1, spin2) sharetotal = sharehead + sharetail if (sharetotal == 0): continue results.append( (sharetotal, sharehead, sharetail, spin1, spin2) ) if (not results): break results.sort(reverse=True) (sharetotal, sharehead, sharetail, spin1, spin2) = results[0] spindles.remove(spin1) spindles.remove(spin2) newspin = merge(spin1, spin2, sharehead, sharetail) spindles.append(newspin) if (len(spindles) == 1): # We merged down to a single spindle. Yay! res = spindles[0] else: # We were left with several spindles, with nothing in common. # Create a single spindle that forks over all of them. # (We use merge() for efficient merging.) spin1 = spindles.pop() spin2 = spindles.pop() res = merge(spin1, spin2, 0, 0) while spindles: spin2 = spindles.pop() res = merge(res, spin2, 0, 0) number_spindle(res) emplace_choices(res) return res class Node: """Node: Represents one move from a transcript, including the player's command, the game's response, and any annotations added to the transcript for that move. (For introductory text, which is not preceded by a command, the command should be None.) In order to make the comparison-and-merge algorithm behave right, two Nodes compare as equal if their commands and bodies are equal. The annotations are ignored for the purposes of comparison. """ def __init__(self, command, body, notels=[], choicesets=None): self.command = command self.body = body self.tup = (command, body) # equality key self.notes = {} for note in notels: if (':' in note): (key, val) = note.split(':', 1) key = key.strip() val = val.strip() else: (key, val) = (note, True) self.notes[key] = val # The following is not used until the process() phase. if choicesets is None: self.choicesets = [] else: self.choicesets = choicesets def __repr__(self): return '<' + repr(self.command) + '>' def __hash__(self): return hash(self.tup) def __eq__(self, node): return (self.tup == node.tup) def __ne__(self, node): return (self.tup != node.tup) def update(self, node): """update(node) -> Node Copy all the notes from the given node into this one. Returns self. """ self.notes.update(node.notes) return self def getnote(self, key): """getnode(key) -> value If this node's notes contain the given key, return its value. Otherwise, return None. """ return self.notes.get(key, None) def parse_transcript(filename): """parse_transcript(filename) -> tuple Read a transcript file. Parse it into a sequence of Nodes. A certain amount of cleanup is done on the transcript, to make it easier to compare. Trailing whitespace is stripped from each line. Blank lines before and after a command line are ignored (but blank lines in the middle of a game response are kept). """ result = [] fl = open(filename) leadingblanks = True command = None bodyls = [] notels = [] notels2 = [] while True: ln = fl.readline() if (not ln): break ln = ln.rstrip() if (leadingblanks and not ln): continue leadingblanks = False if (ln.startswith(opts.prompt)): if (command or bodyls): body = '\n'.join(bodyls) body = body.rstrip() node = Node(command, body, notels) notels = [] result.append(node) notels.extend(notels2) notels2 = [] bodyls = [] command = ln elif (ln.startswith(opts.annotation)): ln = ln[ len(opts.annotation) : ].strip() notels2.append(ln) else: if (bodyls or ln): bodyls.append(ln) if (ln): notels.extend(notels2) notels2 = [] if (command or bodyls): body = '\n'.join(bodyls) body = body.rstrip() node = Node(command, body, notels) result.append(node) fl.close() if leadingblanks or not result: raise Exception('transcript was entirely blank: %s' % (filename,)) return tuple(result) # TemplateHead and TemplateFoot are the boilerplate of the HTML output. TemplateHead = """ $TITLE$

$TITLE$

$SUBTITLE$


""" TemplateFoot = """ """ headwhite_regex = re.compile('^\\s+') def nbsp_head(val): """nbsp_head(val) -> str Convert whitespace at the *beginning* of a string into " ". (This is necessary to convert indented transcript lines into HTML.) """ match = headwhite_regex.match(val) if (not match): return val count = match.end() return (' ' * count) + val[ count : ] def htmlescape(val): """htmlescape(val) -> str Escape text for HTML output. Newlines are turned to
. Indented lines get " " to preserve the indentation. """ val = val.replace('&', '&') val = val.replace('>', '>') val = val.replace('<', '<') ls = val.split('\n') ls = [ nbsp_head(val) for val in ls ] return '
'.join(ls) def generate_sequence(seq): """generate_sequence(seq) -> None Generate the HTML tables for a sequence of nodes. If the sequence is empty, this prints nothing. """ for node in seq: print '' # The biggest, ugliest part of this function is printing the choice # links (the left column of the output). if (not node.choicesets): print '' else: print '' print '' # Print the game notes (right column). nodenote = node.getnote('note') if (not nodenote): print '' else: print '' print '
' firstone = True for (cset, thischoice) in node.choicesets: if firstone: firstone = False else: print '
' ix = 0 for spin in cset.children: label = spin.getnote('option') if not label: for diffspin in cset.children: if (diffspin != spin): label = diffspin.getnote('otheroption') if label: break if not label: label = 'Option %d of %d' % (ix+1, len(cset.children)) if (thischoice == ix): print '%s
' % (label,) else: cmd = "javascript:swap('spin_%d_%d', 'spin_%d_%d')" % (cset.index, thischoice, cset.index, ix) print '%s
' % (cmd, label) ix += 1 print '
' # Print the command and body (middle column). if (node.command): print '

' + htmlescape(node.command) + '

' print '

' + htmlescape(node.body) + '

' print '
' + htmlescape(nodenote) + '
' def generate_spindle(spin): """generate_spindle(spin) -> None Recursively generate the HTML divs and tables for a spindle and its children. """ if (spin.parent): parentindex = spin.parent.index else: parentindex = 0 # Only the first choice in any set is visible, to begin with. disp = '' if (spin.choicenum > 0): disp = ' style="display: none;"' print '
' % (parentindex, spin.choicenum, disp) if (spin.straight): generate_sequence(spin.seq) else: generate_sequence(spin.seqhead) for choice in spin.children: generate_spindle(choice) generate_sequence(spin.seqtail) if (spin.extrachoiceset): # This is the case where the spindle had no nodes at all. We # fake up an empty row to put the choiceset in. emptynode = Node(None, ' ', choicesets = [spin.extrachoiceset]) generate_sequence( (emptynode,) ) print '
' def generate(topspin): """generate(topspin) -> None Generate the complete dynamic HTML page for the transcripts represented by a spindle. The page is printed to stdout. """ title = topspin.getnote('title') if not title: title = 'TITLE' val = TemplateHead.replace('$TITLE$', title) subtitle = topspin.getnote('subtitle') if not subtitle: subtitle = 'A slightly interactive excerpt' val = val.replace('$SUBTITLE$', subtitle) print val generate_spindle(topspin) print TemplateFoot nodelists = [] nodelist_map = {} for filename in args: ls = parse_transcript(filename) if (nodelist_map.has_key(ls)): raise Exception('identical transcripts: %s, %s' % (nodelist_map[ls], filename)) nodelists.append(ls) nodelist_map[ls] = filename spindles = [ Spindle(trans) for trans in nodelists ] result = process(spindles) generate(result)