#!/usr/bin/python

"""Convert an XML document to a set of SQL 'INSERT INTO' statements

Note:

  See http://gnosis.cx/publish/programming/xml_matters_12.txt
  for a detailed discussion of this module.

Functions:

  walkNodes(py_obj)

"""
__shell_usage__ = """
Shell Usage: [python] xml2sql.py [XML] [< XML_from_STDIN]
"""
__version__ = "$Revision: 0.10 $"
__author__=["David Mertz (mertz@gnosis.cx)",]
__thanks_to__=[]
__copyright__="""
    This file is released to the public domain.  I (dqm) would
    appreciate it if you choose to keep derived works under terms
    that promote freedom, but obviously am giving up any rights
    to compel such.
"""

__history__="""
    0.10   Initial version.

"""
# Constants
MYSQL     = 1
DB2       = 2
ORACLE    = 3
MSSQL     = 4
POSTGRES  = 5
MAX_PCDATA = "10000"
MAX_XML    = "10000"

# Imports
import string, sys, re, random
from time import time
from types import *
from xml_objectify import XML_Objectify, DOM
from xml_objectify import keep_containers, MAYBE

# Module setup
insertLines = []
random.seed(time())

def walkNodes(py_obj, parent_info=('',''), seq=0):
    insDct = {}     # initialize the INSERT statement dictionary
                    # table name must be unmangled from [xml_objectify] name
    insDct['table_name'] = py_obj.__class__.__name__.replace('_XO_','')
    insDct['table_name'] = insDct['table_name'].replace('-','__')
                    # column name of parent massaged w/ 'foreign_key_' prefix
    insDct['parent_name'] = 'foreign_key_' + parent_info[0]
                    # primary key of parent table
    insDct['parent_key'] = parent_info[1]
                    # primary key is stringified random BIGINT
    insDct['primary_key'] = str(long(random.random() * 10**18L))
                    # start out with empty strings of other columns/values
    insDct['other_cols'] = ""
    insDct['other_vals'] = ""
    insDct['seq'] = str(seq)

    self_info = (insDct['table_name'],insDct['primary_key'])

    for colname in dir(py_obj):
        safename = colname.replace('-','__')
        if colname == "__parent__":
           continue             # ExpatFactory uses bookeeping attribute

        member = getattr(py_obj,colname)
        if type(member) == InstanceType:
            walkNodes(member, self_info)
        elif type(member) == ListType:
            for memitem in member:
                seq += 1
                walkNodes(memitem, self_info, seq)
        else:
            member = member.replace('"',r'\"')
            member = member.replace('\n',r'\n')
            member = member.replace('\r',r'\r')
            member = member.replace('\t',r'\t')
            insDct['other_cols'] += ','+safename
            insDct['other_vals'] += ',"'+member+'"'

    if parent_info[0]:      # non-root node
        stmt  = "INSERT INTO %(table_name)s "                    % insDct
        stmt +=          "(primary_key,seq,%(parent_name)s"      % insDct
        stmt +=                            "%(other_cols)s) "    % insDct
        stmt += "VALUES (%(primary_key)s,%(seq)s,%(parent_key)s" % insDct
        stmt +=                            "%(other_vals)s)"     % insDct
        insertLines.append(stmt)
    else:                   # the root node
        stmt  = "INSERT INTO %(table_name)s "                    % insDct
        stmt +=     "(primary_key) VALUES (%(primary_key)s)"     % insDct
        insertLines.append(stmt)


#-- Command line version of tool
if __name__ == '__main__':
    if len(sys.argv) > 1:
        if sys.argv[1] in ('-h','/h','-?','/?','?','--help'):
            print __shell_usage__
        else:
            keep_containers(MAYBE)      # Keep XML iff character markup
            py_obj = XML_Objectify(sys.argv[1],DOM).make_instance()
            walkNodes(py_obj)
            for insertLine in insertLines:
                print insertLine.encode('UTF-8')+';'
    else:
        keep_containers(MAYBE)      # Keep XML iff character markup
        py_obj = XML_Objectify(sys.stdin,DOM).make_instance()
        walkNodes(py_obj)
        for insertLine in insertLines:
            print insertLine.encode('UTF-8')+';'


