!) and instead set a CSS left-margin for the line. A small
# negative text-indent ensures that following lines will be more indented.
#
# On top of that, we want the document to be readable even *without* CSS.
# (It's just polite.) The preferred standard for code is a section,
# in which whitespace is significant.
#
# The best plan overall is for every line to be a *separate* tag,
# with margin-left and text-indent set as described. The "chopped-out"
# initial whitespace is still there but hidden by CSS. If you view the
# document without CSS, the margin is gone but the initial whitespace
# comes back -- which is what we want!
#
# The overview is:
#
# Convert tabs to spaces. (Tabs will never show the right width in a
# browser, and they'll mess up our indent math.)
# HTML-escape any non-ASCII characters. (Avoid any possibility of a file
# encoding error.)
# Remove initial whitespace from lines, remembering how much there was.
# If a line is completely blank, add a space so that it doesn't collapse
# completely.
# Wrap the line in ...
, setting margin info
# in a CSS style="..." attribute.
# Add a document wrapper of ...
. (The stylesheet
# needs this.)
# Add the HTML document wrapper, with header and footer, plus the
# stylesheet.
# Before I had this script, I did:
# pygmentize -l inform7 -f html -O nowrap=True -o tmp.html story.ni
# Then I had to do the other steps by hand.
import sys
import optparse
import re
import html
from pygments.formatters import get_formatter_by_name, HtmlFormatter
from pygments.lexers import get_lexer_by_name
from pygments.lexer import Lexer
from pygments.token import Token
from pygments import highlight
popt = optparse.OptionParser()
popt.add_option('-o', '--output',
action='store', dest='outfile',
help='file to write (default: stdout)')
popt.add_option('-t', '--title',
action='store', dest='title',
help='HTML doc title')
popt.add_option('--highlight',
action='store', dest='highlight',
help='line or lines to highlight ("N" or "M-N" inclusive)')
popt.add_option('--head', '--header',
action='store', dest='headfile',
help='file containing HTML snippet to put at top')
popt.add_option('--foot', '--footer',
action='store', dest='footfile',
help='file containing HTML snippet to put at bottom')
popt.add_option('--toc',
action='store_true', dest='toc',
help='create a table of contents listing section headers')
popt.add_option('--old',
action='store_true', dest='oldmode',
help='use an older version of the HTML formatter')
popt.add_option('--stock',
action='store_true', dest='stockmode',
help='use the stock version of the HTML lexer')
(opts, args) = popt.parse_args()
# Match runs of tabs and spaces.
spacerun_pat = re.compile('[ \t]+')
# Match non-ASCII characters.
nonascii_pat = re.compile('[^ -~]')
# Replacement for runs of tabs and spaces.
# This just converts tabs into spaces with an assumed width of 4.
# Remember that the stylesheet includes "white-space: pre-wrap;"
# so runs of space will not be collapsed.
def spacerun_func(match):
val = match.group(0)
if val == ' ':
return val
val = val.replace('\t', ' ')
count = len(val)
return ' ' * count
# Replacement for non-ASCII characters.
# This uses "ሴ" style escapes.
# (When this is called, "<>&" have already been HTML-escaped, so we don't
# worry about them.)
def nonascii_func(match):
ch = match.group(0)
if ch == '\xA0':
return ' '
och = ord(ch)
if och < 32:
return '\\%02X' % (och,)
return '%02X;' % (och,)
pat_initial_spaces = re.compile('^(?:<[^/>]*>)*([ ]+)')
def remove_initial_spaces(ln):
match = pat_initial_spaces.match(ln)
if not match:
return ln, 0
indent = len(match.group(1))
if not indent:
return ln, 0
ln = ln[ : match.start(1) ] + '' + match.group(1) + '' + ln [ match.end(1) : ]
return ln, indent
def uniquify(val, set):
if val not in set:
set.add(val)
return val
index = 1
while True:
newval = '%s_%d' % (val, index,)
if newval not in set:
set.add(newval)
return newval
index += 1
pat_headerline = re.compile('^(.*)$')
pat_htmlentity = re.compile('[&]([a-zA-Z0-9#]+);')
pat_nonalphanumrun = re.compile('[^a-zA-Z0-9]+')
anchor_set = set()
anchor_list = []
def anchorify(ln):
match = pat_headerline.match(ln)
if not match:
return ln
val = match.group(1)
valpart, _, _ = val.partition(' ')
sectype = None
valpart = valpart.lower()
if valpart in ZarfI7Lexer.i7sectionnames:
sectype = ZarfI7Lexer.i7sectionlist.index(valpart)
anchor = val
anchor = pat_htmlentity.sub('*', anchor)
anchor = pat_nonalphanumrun.sub('_', anchor)
anchor = anchor.strip('_')
if not anchor:
anchor = '_'
anchor = anchor.lower()
anchor = uniquify(anchor, anchor_set)
anchor_list.append( (sectype, anchor, val) )
return '%s' % (anchor, val,)
# Subclass of HtmlFormatter which converts tabs to spaces, escapes non-ASCII
# characters, figures out left-margin indentation, and highlights certain
# lines.
# (It used to add the "" wrapper, but HtmlFormatter now
# handles this.)
class I7HtmlFormatter(HtmlFormatter):
def wrap(self, source):
return self._wrap_code(source)
def _wrap_code(self, source):
# yield 0, '
\n'
linenum = 0
for ix, ln in source:
if ix == 1:
linenum += 1
ln = ln.rstrip()
# Convert tabs to spaces.
ln = spacerun_pat.sub(spacerun_func, ln)
# HTML-escape non-ASCII characters.
ln = nonascii_pat.sub(nonascii_func, ln)
# Change section-header spans to anchors.
if opts.toc:
ln = anchorify(ln)
# Trim initial whitespace.
ln, indent = remove_initial_spaces(ln)
if not ln:
# A completely blank line would collapse, so we add a space.
ln = ' '
# Add the
wrapper.
margin = 'margin-left: %dch; text-indent: -2ch;' % (indent+2,)
ln = '' % (margin,) + ln + '
'
# If this line should be highlighted, wrap the whole thing
# ( and all) in a ...
.
if linenum in self.hl_lines:
ln = '' + ln + '
'
ln = ln + '\n'
yield ix, ln
# yield 0, '
\n'
# Suppress the default hl_lines highlighter wrapper, since we've already
# handled that.
def _highlight_lines(self, tokensource):
return tokensource
# Subclass of HtmlFormatter which converts tabs to spaces, escapes non-ASCII
# characters, and adds the "
" wrapper.
# This does *not* do all the clever left-margin/text-indent stuff. The result
# doesn't wrap as nicely, and it looks terrible without CSS. Simpler though.
class I7HtmlFormatterOld(HtmlFormatter):
def wrap(self, source):
return self._wrap_code(source)
def _wrap_code(self, source):
yield 0, '
\n'
for ix, ln in source:
if ix == 1:
ln = ln.rstrip()
ln = spacerun_pat.sub(spacerun_func, ln)
ln = nonascii_pat.sub(nonascii_func, ln)
ln = ln + '\n'
yield ix, ln
yield 0, '
\n'
# Custom lexer for Inform 7.
# Pygments has a built-in I7 lexer, but it insists on syntax-coloring
# any I6 inclusions within the I7 code. I think that looks terrible.
# This one restricts itself to coloring I6 strings and comments, and it
# uses different classes from the I7 ones.
class ZarfI7Lexer(Lexer):
name = 'ZarfI7Lexer'
aliases = ['inform7', 'i7']
filenames = ['*.ni', '*.i7x']
i7sectionlist = ['volume', 'book', 'part', 'chapter', 'section']
i7sectionnames = set(i7sectionlist)
doublequotes = '"\u201C\u201D'
def get_tokens_unprocessed(self, text):
linestyle = Token.Text
pos = 0
last = 0
lentext = len(text)
lastch = ''
linestart = True
linecount = 0
while True:
if linestart:
linecount += 1
# Check to see if this is a special (header) line. If so, flush the output and set the style for the rest of the line.
if pos > last:
yield (last, linestyle, text[last:pos])
last = pos
val = text.find(' ', pos)
# Technically the game title or section name can be indented, but we don't handle that.
# We should recognize the "---- DOCUMENTATION ----" heading here, and switch into a mode where all text is comments except indented text. Currently we don't.
if val >= pos and text[pos:val].lower() in self.i7sectionnames:
# Section header line.
linestyle = Token.Generic.Heading
elif linecount == 1 and pos < lentext and text[pos] in self.doublequotes:
# Game title line.
linestyle = Token.Generic.Heading
else:
# Regular line.
linestyle = Token.Text
if pos >= lentext:
# End of document.
if pos > last:
yield (last, linestyle, text[last:pos])
last = pos
break
ch = text[pos]
if ch == '-' and lastch == '(':
# Begin an I6 inclusion section
pos -= 1
if pos > last:
yield (last, linestyle, text[last:pos])
last = pos
last = pos
pos += 2
lastch = ''
while pos < lentext:
ch = text[pos]
if ch == ')' and lastch == '-':
break
if ch == '"':
# Begin an I6 string
if pos > last:
yield (last, Token.Other, text[last:pos])
last = pos
pos += 1
while pos < lentext:
if text[pos] == '"':
break
pos += 1
pos += 1
yield (last, Token.String.Other, text[last:pos])
last = pos
lastch = ''
continue
if ch == '\'':
# Begin a single-quoted I6 string (dict word)
if pos > last:
yield (last, Token.Other, text[last:pos])
last = pos
pos += 1
while pos < lentext:
if text[pos] == '\'':
break
pos += 1
pos += 1
yield (last, Token.String.Other, text[last:pos])
last = pos
lastch = ''
continue
if ch == '!':
# Begin an I6 comment line
if pos > last:
yield (last, Token.Other, text[last:pos])
last = pos
pos += 1
while pos < lentext:
if text[pos] == '\n':
break
pos += 1
pos += 1
yield (last, Token.Comment.Single, text[last:pos])
last = pos
lastch = ''
continue
pos += 1
lastch = ch
pos += 1
# I6 code content
yield (last, Token.Other, text[last:pos])
last = pos
lastch = ''
continue
if ch == '[' and linestyle != Token.Generic.Heading:
# Begin an I7 comment. These can be recursive, so we need to count bracket depth.
if pos > last:
yield (last, linestyle, text[last:pos])
last = pos
pos += 1
depth = 1
while pos < lentext:
if text[pos] == '[':
depth += 1
if text[pos] == ']':
depth -= 1;
if depth == 0:
break
pos += 1
pos += 1
yield (last, Token.Comment.Multiline, text[last:pos])
last = pos
lastch = ''
continue
if ch in self.doublequotes and linestyle != Token.Generic.Heading:
# Begin an I7 string.
if pos > last:
yield (last, linestyle, text[last:pos])
last = pos
pos += 1
while pos < lentext:
if text[pos] in self.doublequotes:
break
if text[pos] == '[':
# Begin an I7 string interpolation.
if pos > last:
yield (last, Token.String.Double, text[last:pos])
last = pos
pos += 1
while pos < lentext:
if text[pos] == ']':
break
pos += 1
pos += 1
yield (last, Token.String.Interpol, text[last:pos])
last = pos
continue
pos += 1
pos += 1
# I7 string content
yield (last, Token.String.Double, text[last:pos])
last = pos
lastch = ''
continue
pos += 1
lastch = ch
linestart = (ch == '\n' or ch == '\r')
return
if opts.stockmode:
i7lexer = get_lexer_by_name('inform7')
else:
i7lexer = ZarfI7Lexer()
# Structure of HTML document, including the stylesheet.
template = '''
$TITLE$
$BODY$
'''
if len(args) != 1:
print('usage: i7-to-html.py story.ni [ -t title ] [ -o out.html ]')
sys.exit()
fl = open(args[0])
code = fl.read()
fl.close()
# The (stock) I7 formatter is slightly buggy about lines that contain only
# whitespace. We'll strip those down to empty lines.
ls = code.split('\n')
ls = [ val.rstrip() for val in ls ]
code = '\n'.join(ls) + '\n'
# Create a list of lines to highlight, if the --highlight option was given.
# Currently this has to be a string like "10" or "30-39".
highlightlist = None
if opts.highlight:
histart, _, hiend = opts.highlight.partition('-')
if hiend:
highlightlist = list(range(int(histart), int(hiend)+1))
else:
highlightlist = [ int(histart) ]
# Check which formatter to use.
formatter = I7HtmlFormatter
if opts.oldmode:
formatter = I7HtmlFormatterOld
# We always use an empty lineseparator option. (The highlight option adds
# extra lineseparators into the output, and we don't want that. We add
# our own line breaks in the _wrap_code() method.)
# The cssclass option gives the class of the
wrapper around the whole
# code block.
if highlightlist is not None:
htmlformat = formatter(lineseparator='', cssclass='i7', hl_lines=highlightlist)
else:
htmlformat = formatter(lineseparator='', cssclass='i7')
dat = highlight(code, i7lexer, htmlformat)
result = template
titlestr = None
if opts.title:
val = opts.title
val = html.escape(val)
titlestr = nonascii_pat.sub(nonascii_func, val)
result = result.replace('$TITLE$', titlestr)
if opts.toc:
toclines = []
toclines.append('
')
ls = [ sectype for (sectype, anchor, val) in anchor_list if sectype is not None ]
if ls:
mindepth = min(ls)
for (sectype, anchor, val) in anchor_list:
if sectype is None:
continue
depth = sectype - mindepth
ln = '- %s' % (2*depth, anchor, val,)
toclines.append(ln)
toclines.append('
')
dat = '\n'.join(toclines) + '\n
\n' + dat
if opts.headfile:
fl = open(opts.headfile)
headdat = fl.read()
fl.close()
if titlestr is not None:
headdat = headdat.replace('$TITLE$', titlestr)
dat = headdat + dat
if opts.footfile:
fl = open(opts.footfile)
footdat = fl.read()
fl.close()
if titlestr is not None:
footdat = footdat.replace('$TITLE$', titlestr)
dat = footdat + dat
result = result.replace('$BODY$', dat)
if not opts.outfile:
print(result)
else:
fl = open(opts.outfile, 'w')
fl.write(result)
fl.close()
print('Generated ' + opts.outfile)