#!ruby # ============================================================================= # # Convert an OPML file to an ordered list, with the addition of # heirarchical numbering, both in text and as named anchors, # for embedding in HTML and/or WIKI # # *** Read the OPML file (as XML, in a very simple format) # TODO: Look at OPML specification; currently only read <outline text="text"> # # *** Write the <body> of the OPML as an HTML outline with named anchors # and heirarchical legal outline numbers, suitable for embedding. # # TODO: # Explicitly use "Purple Numbers" and/or Node IDs # (http://www.eekim.com/software/purple/purple.html) # Explicitly conform to XOXO # (http://microformats.org/wiki/xoxo) # Learn Ruby # # Written August, 2007, Edwin Wise, Simulated Reality Systems, LLC # www.simreal.com # # DEPENDENCIES: # REXML http://www.germane-software.com/software/rexml/ # GetoptLong http://www.ruby-doc.org/stdlib/libdoc/getoptlong/rdoc/index.html # ============================================================================= # ============================================================================= # Help / Usage # ============================================================================= def usage(*errorMesg) print "nnERROR: '" + errorMesg.join("n") + "'n" if (errorMesg.length > 0) print "nUsage:n" print "OPMLtoOL.rb --opml file.opml --dest folder --format stylen" print "where:n" print " --opml Source OPML file.n" print " --dest Destination folder name (optional).n" print " --format Output format, one of: pbwiki, mediawiki, html.n" end # ============================================================================= # genOutlineString # # Given an outline numbering array, turn it into a relevant string # ============================================================================= def genOutlineString(outline, delim) string = "" outline.each { |index| string << index.to_s + delim } string.chop! if (delim === "_") string end # ============================================================================= # GenericFormatter base class # # Given an element, pull out its text and generate the appropriate list # entry. If that element has sub-elements, recurses appropriately. # # Abstract wrapper only; subclass to give specific formats. # ============================================================================= class GenericFormatter def initialize end def listStart raise "Invalid use of GenericFormatter.listStart" end def listEnd raise "Invalid use of GenericFormatter.listEnd" end def indent(depth) raise "Invalid use of GenericFormatter.indent" end def itemStart raise "Invalid use of GenericFormatter.itemStart" end def itemEnd raise "Invalid use of GenericFormatter.itemEnd" end def anchor(hidAnchor) raise "Invalid use of GenericFormatter.anchor" end def link(hidAnchor, hidName) raise "Invalid use of GenericFormatter.link" end def title(element) element.attributes["text"] end def writeElement(dstFile, element, outline) depth = outline.size count = outline.pop + 1 outline.push(count) # # Current Item # hidAnchor = "hid_" + genOutlineString(outline, "_") hidName = genOutlineString(outline, ".") dstFile.print indent(depth)+itemStart+anchor(hidAnchor)+link(hidAnchor, hidName)+title(element)+itemEnd # # Optional Element Recursion # if (element.has_elements?) then dstFile.print indent(depth) + listStart; outline.push(0) element.elements.each("outline") { |subelement| writeElement(dstFile, subelement, outline) } dstFile.print indent(depth) + listEnd; outline.pop end end end # ============================================================================= # HTML Formatter # ============================================================================= class HTMLFormatter < GenericFormatter def initialize end def listStart "<ul>n" end def listEnd "</ul>n" end def indent(depth) " "*depth end def itemStart "<li>" end def itemEnd "</li>n" end def anchor(hidAnchor) '<span id="' + hidAnchor + '">' end def link(hidAnchor, hidName) '<a href="#' + hidAnchor + '">' + hidName + "</a></span>" end end # ============================================================================= # MediaWiki Formatter # ============================================================================= class MediaWikiFormatter < HTMLFormatter def initialize end def link(hidAnchor, hidName) "[[#" + hidAnchor + "|" + hidName + "]]</span>" end end # ============================================================================= # PBWiki Formatter # ============================================================================= class PBWikiFormatter < HTMLFormatter def initialize end def indent(depth) "" end def listStart "<ul>" end def listEnd "</ul>" end def itemEnd "</li>" end end # ============================================================================= # Main Loop # ============================================================================= begin require "rexml/document" require "getoptlong" include REXML opts = GetoptLong.new( ["--opml", GetoptLong::REQUIRED_ARGUMENT], ["--dest", GetoptLong::OPTIONAL_ARGUMENT], ["--format", GetoptLong::REQUIRED_ARGUMENT], ["--help", GetoptLong::NO_ARGUMENT] ) # # srcOPMLFilename = '' destFolderName = '' destOLFilename = '' formatStr = "mediawiki" formatter = nil opts.each { |opt, arg| case opt when /^--opml$/ then srcOPMLFilename = arg when /^--dest$/ then destFolderName = arg when /^--format$/ then formatStr = arg when /^--help$/ then usage; exit 0; else usage("Unrecognized option #{opt}...n") end } begin rescue GetoptLong::InvalidOption usage exit 1 end case (formatStr) when "mediawiki" formatter = MediaWikiFormatter.new when "pbwiki" formatter = PBWikiFormatter.new when "html" formatter = HTMLFormatter.new end usage("You must specify a valid --format") if (formatter.nil?) usage("You must specify --opml") if (srcOPMLFilename.empty?) usage("OPML File '" + srcOPMLFilename + "' doesn't exist") unless File.exists?(srcOPMLFilename) usage("OPML File '" + srcOPMLFilename + "' isn't readable") unless File.readable?(srcOPMLFilename) destFolderName = File.dirname(srcOPMLFilename) if (destFolderName.empty?) # # destOLFilename = destFolderName + File::SEPARATOR + File.basename(srcOPMLFilename, ".opml") + ".html" # # opml = Document.new File.open(srcOPMLFilename, File::RDONLY) outline = Array.new File.open(destOLFilename, File::WRONLY|File::CREAT|File::TRUNC) { |dstFile| dstFile.print formatter.listStart; outline.push(0) opml.elements.each("opml/body/outline") { |element| formatter.writeElement(dstFile, element, outline) } dstFile.print formatter.listEnd; outline.pop } end