| 
  • If you are citizen of an European Union member nation, you may not use this service unless you are at least 16 years old.

View
 

OPMLtoOL

Page history last edited by PBworks 17 years, 2 months ago


#!ruby
# =============================================================================
#
# Convert an OPML file to an ordered list, with the addition of
# heirarchical numbering, both in text and as named anchors,
# for embedding in HTML and/or WIKI
#
# *** Read the OPML file (as XML, in a very simple format)
#     TODO: Look at OPML specification; currently only read <outline text="text">
#
# *** Write the <body> of the OPML as an HTML outline with named anchors
#     and heirarchical legal outline numbers, suitable for embedding.
#
# TODO:
#     Explicitly use "Purple Numbers" and/or Node IDs 
#           (http://www.eekim.com/software/purple/purple.html)
#     Explicitly conform to XOXO
#           (http://microformats.org/wiki/xoxo)
#     Learn Ruby
#
# Written August, 2007, Edwin Wise, Simulated Reality Systems, LLC
# www.simreal.com
#
# DEPENDENCIES:   
#  REXML       http://www.germane-software.com/software/rexml/
#  GetoptLong  http://www.ruby-doc.org/stdlib/libdoc/getoptlong/rdoc/index.html
# =============================================================================

# =============================================================================
# Help / Usage
# =============================================================================
def usage(*errorMesg)
   print "nnERROR: '" + errorMesg.join("n") + "'n" if (errorMesg.length > 0)
   print "nUsage:n"
   print "OPMLtoOL.rb --opml file.opml --dest folder --format stylen"
   print "where:n"
   print "   --opml        Source OPML file.n"
   print "   --dest        Destination folder name (optional).n"
   print "   --format      Output format, one of: pbwiki, mediawiki, html.n"
end

# =============================================================================
#  genOutlineString
# 
#  Given an outline numbering array, turn it into a relevant string
# =============================================================================
def genOutlineString(outline, delim)
   string = ""
   outline.each { |index|
      string << index.to_s + delim
   }
   string.chop! if (delim === "_")
   string
end

# =============================================================================
#  GenericFormatter base class
#
#  Given an element, pull out its text and generate the appropriate list
#  entry.  If that element has sub-elements, recurses appropriately.
#
#  Abstract wrapper only; subclass to give specific formats.
# =============================================================================
class GenericFormatter
   def initialize
   end
   def listStart
      raise "Invalid use of GenericFormatter.listStart"
   end
   def listEnd
      raise "Invalid use of GenericFormatter.listEnd"
   end
   def indent(depth)
      raise "Invalid use of GenericFormatter.indent"
   end
   def itemStart
      raise "Invalid use of GenericFormatter.itemStart"
   end
   def itemEnd
      raise "Invalid use of GenericFormatter.itemEnd"
   end
   def anchor(hidAnchor)
      raise "Invalid use of GenericFormatter.anchor"
   end
   def link(hidAnchor, hidName)
      raise "Invalid use of GenericFormatter.link"
   end
   def title(element)
      element.attributes["text"]
   end
   def writeElement(dstFile, element, outline)
      depth = outline.size
      count = outline.pop + 1
      outline.push(count)
      #
      # Current Item
      #
      hidAnchor = "hid_" + genOutlineString(outline, "_")
      hidName = genOutlineString(outline, ".")
      
      dstFile.print indent(depth)+itemStart+anchor(hidAnchor)+link(hidAnchor, hidName)+title(element)+itemEnd
      #
      # Optional Element Recursion
      #
      if (element.has_elements?) then
         dstFile.print indent(depth) + listStart; outline.push(0)
         element.elements.each("outline") { |subelement|
            writeElement(dstFile, subelement, outline)
         }
         dstFile.print indent(depth) + listEnd; outline.pop
      end
   end
end


# =============================================================================
#  HTML Formatter
# =============================================================================
class HTMLFormatter < GenericFormatter
   def initialize
   end
   
   def listStart
      "<ul>n"
   end
   def listEnd
      "</ul>n"
   end
   def indent(depth)
      "   "*depth
   end
   def itemStart
      "<li>"
   end
   def itemEnd
      "</li>n"
   end
   def anchor(hidAnchor)
      '<span id="' + hidAnchor + '">'
   end
   def link(hidAnchor, hidName)
      '<a href="#' + hidAnchor + '">' + hidName + "</a></span>"
   end
end


# =============================================================================
#  MediaWiki Formatter
# =============================================================================
class MediaWikiFormatter < HTMLFormatter
   def initialize
   end
   def link(hidAnchor, hidName)
      "[[#" + hidAnchor + "|" + hidName + "]]</span>"
   end
end


# =============================================================================
#  PBWiki Formatter
# =============================================================================
class PBWikiFormatter < HTMLFormatter
   def initialize
   end
   def indent(depth)
      ""
   end
   def listStart
      "<ul>"
   end
   def listEnd
      "</ul>"
   end
   def itemEnd
      "</li>"
   end
end

# =============================================================================
#  Main Loop
# =============================================================================
begin
   require "rexml/document"
   require "getoptlong"
   
   include REXML

   opts = GetoptLong.new(
      ["--opml", GetoptLong::REQUIRED_ARGUMENT],
		["--dest", GetoptLong::OPTIONAL_ARGUMENT],
      ["--format", GetoptLong::REQUIRED_ARGUMENT],
      ["--help", GetoptLong::NO_ARGUMENT]
   )
   #
   #
   srcOPMLFilename = ''
   destFolderName = ''
   destOLFilename = ''
   formatStr = "mediawiki"
   formatter = nil
   
   opts.each { |opt, arg|
      case opt
         when /^--opml$/      then srcOPMLFilename = arg
         when /^--dest$/		then destFolderName = arg
         when /^--format$/    then formatStr = arg
         when /^--help$/      then usage; exit 0;
         else usage("Unrecognized option #{opt}...n")
      end
   }
   begin
      rescue GetoptLong::InvalidOption
         usage
         exit 1
   end
   case (formatStr)
      when "mediawiki"
         formatter = MediaWikiFormatter.new
      when "pbwiki"
         formatter = PBWikiFormatter.new
      when "html"
         formatter = HTMLFormatter.new
   end
   usage("You must specify a valid --format") if (formatter.nil?)
   usage("You must specify --opml") if (srcOPMLFilename.empty?)
   usage("OPML File '" + srcOPMLFilename + "' doesn't exist") unless File.exists?(srcOPMLFilename)
   usage("OPML File '" + srcOPMLFilename + "' isn't readable") unless File.readable?(srcOPMLFilename)
   
   destFolderName = File.dirname(srcOPMLFilename) if (destFolderName.empty?)
   #
   #
   destOLFilename = destFolderName + 
                     File::SEPARATOR + 
                     File.basename(srcOPMLFilename, ".opml") +
                     ".html"
   #
   #
   opml = Document.new File.open(srcOPMLFilename, File::RDONLY)
   outline = Array.new
   
   File.open(destOLFilename, File::WRONLY|File::CREAT|File::TRUNC) { |dstFile|
      dstFile.print formatter.listStart; outline.push(0)
      opml.elements.each("opml/body/outline") { |element|
         formatter.writeElement(dstFile, element, outline)
      }
      dstFile.print formatter.listEnd; outline.pop
   }
end

Comments (0)

You don't have permission to comment on this page.