#!ruby
# =============================================================================
#
# Convert an OPML file to an ordered list, with the addition of
# heirarchical numbering, both in text and as named anchors,
# for embedding in HTML and/or WIKI
#
# *** Read the OPML file (as XML, in a very simple format)
# TODO: Look at OPML specification; currently only read <outline text="text">
#
# *** Write the <body> of the OPML as an HTML outline with named anchors
# and heirarchical legal outline numbers, suitable for embedding.
#
# TODO:
# Explicitly use "Purple Numbers" and/or Node IDs
# (http://www.eekim.com/software/purple/purple.html)
# Explicitly conform to XOXO
# (http://microformats.org/wiki/xoxo)
# Learn Ruby
#
# Written August, 2007, Edwin Wise, Simulated Reality Systems, LLC
# www.simreal.com
#
# DEPENDENCIES:
# REXML http://www.germane-software.com/software/rexml/
# GetoptLong http://www.ruby-doc.org/stdlib/libdoc/getoptlong/rdoc/index.html
# =============================================================================
# =============================================================================
# Help / Usage
# =============================================================================
def usage(*errorMesg)
print "nnERROR: '" + errorMesg.join("n") + "'n" if (errorMesg.length > 0)
print "nUsage:n"
print "OPMLtoOL.rb --opml file.opml --dest folder --format stylen"
print "where:n"
print " --opml Source OPML file.n"
print " --dest Destination folder name (optional).n"
print " --format Output format, one of: pbwiki, mediawiki, html.n"
end
# =============================================================================
# genOutlineString
#
# Given an outline numbering array, turn it into a relevant string
# =============================================================================
def genOutlineString(outline, delim)
string = ""
outline.each { |index|
string << index.to_s + delim
}
string.chop! if (delim === "_")
string
end
# =============================================================================
# GenericFormatter base class
#
# Given an element, pull out its text and generate the appropriate list
# entry. If that element has sub-elements, recurses appropriately.
#
# Abstract wrapper only; subclass to give specific formats.
# =============================================================================
class GenericFormatter
def initialize
end
def listStart
raise "Invalid use of GenericFormatter.listStart"
end
def listEnd
raise "Invalid use of GenericFormatter.listEnd"
end
def indent(depth)
raise "Invalid use of GenericFormatter.indent"
end
def itemStart
raise "Invalid use of GenericFormatter.itemStart"
end
def itemEnd
raise "Invalid use of GenericFormatter.itemEnd"
end
def anchor(hidAnchor)
raise "Invalid use of GenericFormatter.anchor"
end
def link(hidAnchor, hidName)
raise "Invalid use of GenericFormatter.link"
end
def title(element)
element.attributes["text"]
end
def writeElement(dstFile, element, outline)
depth = outline.size
count = outline.pop + 1
outline.push(count)
#
# Current Item
#
hidAnchor = "hid_" + genOutlineString(outline, "_")
hidName = genOutlineString(outline, ".")
dstFile.print indent(depth)+itemStart+anchor(hidAnchor)+link(hidAnchor, hidName)+title(element)+itemEnd
#
# Optional Element Recursion
#
if (element.has_elements?) then
dstFile.print indent(depth) + listStart; outline.push(0)
element.elements.each("outline") { |subelement|
writeElement(dstFile, subelement, outline)
}
dstFile.print indent(depth) + listEnd; outline.pop
end
end
end
# =============================================================================
# HTML Formatter
# =============================================================================
class HTMLFormatter < GenericFormatter
def initialize
end
def listStart
"<ul>n"
end
def listEnd
"</ul>n"
end
def indent(depth)
" "*depth
end
def itemStart
"<li>"
end
def itemEnd
"</li>n"
end
def anchor(hidAnchor)
'<span id="' + hidAnchor + '">'
end
def link(hidAnchor, hidName)
'<a href="#' + hidAnchor + '">' + hidName + "</a></span>"
end
end
# =============================================================================
# MediaWiki Formatter
# =============================================================================
class MediaWikiFormatter < HTMLFormatter
def initialize
end
def link(hidAnchor, hidName)
"[[#" + hidAnchor + "|" + hidName + "]]</span>"
end
end
# =============================================================================
# PBWiki Formatter
# =============================================================================
class PBWikiFormatter < HTMLFormatter
def initialize
end
def indent(depth)
""
end
def listStart
"<ul>"
end
def listEnd
"</ul>"
end
def itemEnd
"</li>"
end
end
# =============================================================================
# Main Loop
# =============================================================================
begin
require "rexml/document"
require "getoptlong"
include REXML
opts = GetoptLong.new(
["--opml", GetoptLong::REQUIRED_ARGUMENT],
["--dest", GetoptLong::OPTIONAL_ARGUMENT],
["--format", GetoptLong::REQUIRED_ARGUMENT],
["--help", GetoptLong::NO_ARGUMENT]
)
#
#
srcOPMLFilename = ''
destFolderName = ''
destOLFilename = ''
formatStr = "mediawiki"
formatter = nil
opts.each { |opt, arg|
case opt
when /^--opml$/ then srcOPMLFilename = arg
when /^--dest$/ then destFolderName = arg
when /^--format$/ then formatStr = arg
when /^--help$/ then usage; exit 0;
else usage("Unrecognized option #{opt}...n")
end
}
begin
rescue GetoptLong::InvalidOption
usage
exit 1
end
case (formatStr)
when "mediawiki"
formatter = MediaWikiFormatter.new
when "pbwiki"
formatter = PBWikiFormatter.new
when "html"
formatter = HTMLFormatter.new
end
usage("You must specify a valid --format") if (formatter.nil?)
usage("You must specify --opml") if (srcOPMLFilename.empty?)
usage("OPML File '" + srcOPMLFilename + "' doesn't exist") unless File.exists?(srcOPMLFilename)
usage("OPML File '" + srcOPMLFilename + "' isn't readable") unless File.readable?(srcOPMLFilename)
destFolderName = File.dirname(srcOPMLFilename) if (destFolderName.empty?)
#
#
destOLFilename = destFolderName +
File::SEPARATOR +
File.basename(srcOPMLFilename, ".opml") +
".html"
#
#
opml = Document.new File.open(srcOPMLFilename, File::RDONLY)
outline = Array.new
File.open(destOLFilename, File::WRONLY|File::CREAT|File::TRUNC) { |dstFile|
dstFile.print formatter.listStart; outline.push(0)
opml.elements.each("opml/body/outline") { |element|
formatter.writeElement(dstFile, element, outline)
}
dstFile.print formatter.listEnd; outline.pop
}
end
Comments (0)
You don't have permission to comment on this page.