#! /usr/bin/env python
#      -*- OpenSAF  -*-
#
# (C) Copyright 2009 The OpenSAF Foundation
# (C) Copyright Ericsson AB 2015, 2016, 2017. All rights reserved.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. This file and program are licensed
# under the GNU Lesser General Public License Version 2.1, February 1999.
# The complete license can be accessed from the following location:
# http://opensource.org/licenses/lgpl-license.php
# See the Copying file included with the OpenSAF distribution for full
# licensing terms.
#
# Author(s): Ericsson AB
#
# pylint: disable=invalid-name,unused-argument
""" immxml-merge tool """
from __future__ import print_function
import os
import sys
import re
import getopt
import shutil
import xml.dom.minidom
from datetime import datetime
from baseimm import BaseOptions, BaseImmDocument, trace, retrieve_file_names, \
    print_info_stderr, abort_script, verify_input_file_read_access

PY3 = True if sys.version_info[0] == 3 else False


def bytes_to_str(bytes_obj, encoding):
    """ Convert bytes object to string in python3 """
    if PY3 and isinstance(bytes_obj, bytes):
        return bytes_obj.decode(encoding)

    return bytes_obj


class Options(BaseOptions):
    """ immxml-merge options """
    keepCommentElements = False
    ignoreVariants = False
    ignoreMissingClass = False
    grep_class = None
    grep_dn = None
    negateGrep_class = False
    negateGrep_dn = False
    sort = False
    outputFilename = None
    stdout = True
    schemaFilename = None
    isXmlLintFound = True
    disableParentDnValidation = False

    @staticmethod
    def print_option_settings():
        return BaseOptions.print_option_settings() + \
               ' keepCommentElements: %s\n ignoreVariants: %s\n ' \
               'grep_class: %s\n grep_dn: %s\n negateGrep_dn: %s\n ' \
               'negateGrep_class: %s\n sort:%s\n outputFilename: %s\n ' \
               'schemaFilename: %s\n' % \
               (Options.keepCommentElements, Options.ignoreVariants,
                Options.grep_class, Options.grep_dn, Options.negateGrep_dn,
                Options.negateGrep_class, Options.sort, Options.outputFilename,
                Options.schemaFilename)


class MergedImmDocument(BaseImmDocument):
    """ This class contains methods to process the merging of IMM XML files """
    def __init__(self):
        self.isResultDocInitialized = False
        self.document = xml.dom.minidom.Document()
        self.imm_content_element = None
        self.classList = []
        self.classDict = {}
        self.regexpObj = None
        self.dn_regexpObj = None
        self.firstSourceDoc = None
        self.objectList = []
        self.objectDnNameDict = {}
        self.classes_parsed = 0
        self.objects_parsed = 0

    def initialize(self):
        """ Initialize matching patterns """
        if Options.grep_class:
            trace("Matching classes with pattern: %s", Options.grep_class)
            self.regexpObj = re.compile(Options.grep_class)

        if Options.grep_dn:
            trace("grep_dn matching dn with pattern: %s", Options.grep_dn)
            self.dn_regexpObj = re.compile(Options.grep_dn)

    def init_result_document(self, doc):
        """ Initialize merge result document """
        if self.isResultDocInitialized:
            return

        trace("Copy elements from first source document file")
        for element in doc.childNodes:
            trace("nodeType: *%d*", element.nodeType)
            trace("nodeValue: *%s*", element.nodeValue)
            trace("nodeName: *%s*", element.nodeName)
            trace("parent: *%s*", element.parentNode.nodeName)
            clone = element.cloneNode(False)
            self.document.appendChild(clone)
            if element.nodeName == self.imm_content_element_name:
                self.imm_content_element = clone
                break

        if self.imm_content_element is None:
            abort_script("Did not find <imm:IMM-contents element in first "
                         "source file")

        self.firstSourceDoc = doc
        self.isResultDocInitialized = True
        trace("Done copying elements from first source document.")

    def add_class(self, class_element, other_nodes):
        """ Add classes matching pattern """
        class_name = class_element.getAttribute("name")
        self.classes_parsed = self.classes_parsed + 1
        trace("className:%s nodeType:%d nodeName:%s", class_name,
              class_element.nodeType, class_element.nodeName)

        if self.regexpObj is not None:
            trace("Check if class %s match pattern %s", class_name,
                  Options.grep_class)
            if re.search(self.regexpObj, class_name) is None:
                if not Options.negateGrep_class:
                    trace("Class %s does not match pattern %s", class_name,
                          Options.grep_class)
                    return
            elif Options.negateGrep_class:
                trace("Class %s does not match negated pattern %s", class_name,
                      Options.grep_class)
                return

        trace("match ok: %s", class_name)

        # Check if class exists in dictionary map
        if class_name in self.classDict:
            # Check if class clash is "acceptable"
            # i.e. should we bail out or not??
            if self.check_class_clash_acceptable(class_element, class_name):
                trace("Ignore class definition duplicate %s. "
                      "(Class is compatible)", class_name)
        else:
            self.classDict[class_name] = class_element
            # Add to ordered list
            self.classList.append((class_name, (other_nodes, class_element)))

    def add_object(self, object_element, other_nodes):
        """ Add objects matching pattern """
        class_name = object_element.getAttribute("class")
        trace("DUMPING Object with className: %s\n %s", class_name,
              object_element.toxml())
        self.objects_parsed = self.objects_parsed + 1

        object_dn_element = None
        # Find "dn" child node
        for child_node in object_element.childNodes:
            if child_node.nodeName == "dn":
                object_dn_element = child_node
                break

        if object_dn_element is None or class_name is None \
                or object_dn_element.firstChild is None:
            abort_script("Failed to find class name or the dn child node in "
                         "object: %s", object_element.toxml())

        # object_dn_name = object_dn_element.nodeValue
        # NOTE: dn name should be the nodeValue of object_dn_name
        # However minidom probably gets confused by the equal sign in dn value
        # so it ends up adding a text node as a child.
        object_dn_name = object_dn_element.firstChild.nodeValue
        trace("objectDn key: %s", object_dn_name)
        trace("objectDnElement: %s", object_dn_element.toxml())

        # className must exist in dictionary map
        # (unless --ignore-noclass is specified)
        if class_name not in self.classDict:
            if not Options.ignoreMissingClass:
                if Options.grep_class is not None:
                    trace("Ignoring object with class not matching pattern "
                          "className: %s", class_name)
                    return
                else:
                    abort_script("Failed to find class referred in: %s",
                                 object_element.toxml())
                trace("zzz")
            else:
                trace("Continue processing object with missing class "
                      "(--ignore-missing-class)")

        if self.dn_regexpObj is not None:
            trace("Grep_dn check if object dn %s matches pattern %s",
                  object_dn_name, Options.grep_dn)
            if re.search(self.dn_regexpObj, object_dn_name) is None:
                if not Options.negateGrep_dn:
                    trace("Object dn %s does not match pattern %s",
                          object_dn_name, Options.grep_dn)
                    return
            elif Options.negateGrep_dn:
                trace("Object dn %s does not match negated pattern %s",
                      object_dn_name, Options.grep_dn)
                return

        trace("match ok: %s", object_dn_name)

        if object_dn_name in self.objectDnNameDict:
            if self.check_object_clash_acceptable(object_dn_name):
                trace("Ignore duplicate object: %s with dn: %s", class_name,
                      object_dn_name)
            return
            # TODO add code to check if this is valid clash

        # self.validateAndStoreDn(object_dn_name, object_dn_element)
        self.objectDnNameDict[object_dn_name] = object_dn_element

        # Add the complete object to ordered list
        # (keyed by class_name for sorting)
        # self.objectList.append((class_name + object_dn_name, object_element))
        object_list_key = object_dn_name  # anything goes, it's not used...
        if Options.sort:
            object_list_key_list = [self.get_dn_sort_prefix(object_dn_name),
                                    class_name, object_dn_name]
            object_list_key = ''.join(object_list_key_list)
            # objectListKey = self.get_dn_sort_prefix(objectDnName) + className
            trace("Object sort order key: %s", object_list_key)

        self.objectList.append((object_list_key,
                                (other_nodes, object_element)))

    @staticmethod
    def get_dn_sort_prefix(object_dn_name):
        """ Get dn sort prefix """
        sort_key_list = ["A"]
        depth = 1
        max_depth = 10
        # Search for all unescaped commas (if any)
        for i in range(0, len(object_dn_name)):
            if object_dn_name[i] == ',':
                if i > 1 and object_dn_name[i - 1] != '\\':
                    sort_key_list.append(",A")
                    depth = depth + 1

        if depth >= max_depth:
            exc_str = "Object reference depth is higher than expected " \
                      "(maximum %d)" % max_depth
            raise Exception(exc_str)

        for i in range(depth + 1, max_depth):
            sort_key_list.append("  ")

        # sort_key_list.append("#")
        key = ''.join(sort_key_list)
        trace("get_dn_sort_prefix: %s", key)
        return key

    @staticmethod
    def check_class_clash_acceptable(class_element, class_name):
        """ Check if class clash is acceptable """
        # previous_class_value = self.classDict.get(class_name)
        # TODO deep verify class element clash is acceptable or not ?????

        if not Options.ignoreVariants:
            abort_script("Failed to merge input files class with name: %s "
                         "exists in multiple input files "
                         "(use --ignore-variants to override)", class_name)

        return True

    @staticmethod
    def check_object_clash_acceptable(object_dn):
        """ Check if object clash is acceptable """
        if not Options.ignoreVariants:
            abort_script("Failed to merge input files object with dn: %s "
                         "exists in multiple input files "
                         "(use --ignore-variants to override)", object_dn)

        return True

    def process_input_file(self, file_name):
        """ Process input file """
        trace("")
        trace("process_input_file for file: %s", file_name)

        if Options.isXmlLintFound and Options.schemaFilename is not None:
            if self.validate_xml_file_with_schema(file_name,
                                                  Options.schemaFilename) != 0:
                abort_script("Failed to validate input file: %s", file_name)
        else:
            self.verify_input_xml_document_file_is_parsable(file_name)

        doc = xml.dom.minidom.parse(file_name)
        # doc = self.open_xml_document_file_and_check_namespace(file_name)

        if not self.isResultDocInitialized:
            self.init_result_document(doc)

        # Fast forward to imm:contents element
        for element in doc.childNodes:
            other_nodes = []
            if element.nodeName == self.imm_content_element_name:
                for child_element in element.childNodes:
                    trace("imm:contents loop.....nodeName:%s nodeValue:%s",
                          child_element.nodeName, child_element.nodeValue)
                    if child_element.nodeName == "class":
                        self.add_class(child_element, other_nodes)
                        # for other_node in other_nodes:
                        #    trace("otherNode: %s", other_node.toxml())
                        other_nodes = []
                    elif child_element.nodeName == "object":
                        self.add_object(child_element, other_nodes)
                        # for other_node in other_nodes:
                        #    trace("otherNode: %s", other_node.toxml())
                        other_nodes = []
                    else:
                        # probably text nodes....ignore if whitespace only
                        # child_element_str = \
                        #     child_element.nodeValue.lstrip().rstrip()
                        # if len(child_element_str) > 1:
                        #     other_nodes.append(child_element)
                        other_nodes.append(child_element)

        return 0

    def save_result(self):
        """ Save merge result to file or print to stdout """
        trace("")

        # if len(self.classList) < 1 and Options.grep_class is not None:
        #     exit_script("No class matches pattern %s specified with
        # --grep-class. No document is saved. Exiting!", Options.grep_class)

        # if len(self.objectList) < 1 and Options.grep_dn is not None:
        #     exit_script("No object matches pattern %s specified with
        # --grep-dn. No document is saved. Exiting!", Options.grep_dn)

        # Use a temp file when output file is not specified.
        # When this script finishes it prints the file to stdout and removes
        # the file
        if Options.stdout:
            Options.outputFilename = "/tmp/merge_result.xml"

        if Options.sort:
            trace("Sorting the classes & objects in resulting xml document")
            self.classList.sort()
            self.objectList.sort()

        trace("Number of classes in resulting xml document: %d",
              len(self.classList))

        # Iterate over all objects again to validate again when all objects are
        # parsed from input files
        # if not Options.disableParentDnValidation:
        #     self.postValidateObjectList()
        # I think there should not be imm validation functionality in merge
        # tool (use validate_immfile instead)

        print_info_stderr("encoding in first source xml document: %s",
                          self.firstSourceDoc.encoding)
        tmp_output_file_name = Options.outputFilename + ".tmp"
        file_object = open(tmp_output_file_name, "w")

        if self.firstSourceDoc.encoding is not None \
                and self.firstSourceDoc.encoding.lower() == "utf-8":
            encoding = "utf-8"
            heading = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
        else:
            encoding = None
            heading = "<?xml version=\"1.0\"?>"

        file_object.write(heading)
        string = bytes_to_str(self.imm_content_element.toxml(encoding),
                              encoding)
        file_object.write(string.replace("/>", ">") + "\n")
        for class_element_tuple in self.classList:
            if Options.keepCommentElements:
                for textNode in class_element_tuple[1][0]:
                    string = bytes_to_str(textNode.toxml(encoding), encoding)
                    file_object.write(string + "\n")
            string = bytes_to_str(class_element_tuple[1][1].toxml(encoding),
                                  encoding)
            file_object.write(string + "\n")
        for object_element_tuple in self.objectList:
            if Options.keepCommentElements:
                for textNode in object_element_tuple[1][0]:
                    string = bytes_to_str(textNode.toxml(encoding), encoding)
                    file_object.write(string + "\n")
            string = bytes_to_str(object_element_tuple[1][1].toxml(encoding),
                                  encoding)
            file_object.write(string + "\n")
        file_object.write("</imm:IMM-contents>")
        file_object.close()
        trace("Stored resulting xml document in tmp file: %s",
              tmp_output_file_name)

        if Options.isXmlLintFound:
            # command = "/bin/sh -c 'XMLLINT_INDENT=\"    \";
            # export XMLLINT_INDENT; /usr/bin/xmllint --format " +
            # tmp_output_file_name + " --output " + Options.outputFilename +
            # "'"
            if self.format_xml_file_with_xmllint(tmp_output_file_name,
                                                 Options.outputFilename) != 0:
                abort_script("Failed to validate input file: %s",
                             tmp_output_file_name)
            trace("Delete the tmp file: %s", tmp_output_file_name)
            os.remove(tmp_output_file_name)
        else:
            # At least we should move the file to its real name
            trace("shutil.move(%s, %s)", tmp_output_file_name,
                  Options.outputFilename)
            shutil.move(tmp_output_file_name, Options.outputFilename)

        trace("Number of classes parsed: %d stored: %d", self.classes_parsed,
              len(self.classList))
        trace("Number of objects parsed: %d stored: %d", self.objects_parsed,
              len(self.objectList))

        diff_classes = self.classes_parsed - len(self.classList)
        diff_objects = self.objects_parsed - len(self.objectList)
        print_info_stderr("Note! Merge ignored %d classes "
                          "(parsed:%d stored:%d)", diff_classes,
                          self.classes_parsed, len(self.classList))
        print_info_stderr("Note! Merge ignored %d objects "
                          "(parsed:%d stored:%d)", diff_objects,
                          self.objects_parsed, len(self.objectList))

        trace("Stored formatted xml document in file: %s",
              Options.outputFilename)

        if Options.isXmlLintFound and Options.schemaFilename is not None:
            self.validate_xml_file_with_schema(Options.outputFilename,
                                               Options.schemaFilename)
        if Options.stdout:
            self.print_to_stdout_and_remove_tmp_file(Options.outputFilename)

# End of MergedImmDocument class


def print_usage():
    """ Print usage of immxml-merge tool """
    print ("usage: immxml-merge [options] filename[s]")
    print ("""
    -o, --output            specified output file
                            (If option is omitted stdout is used)

    --grep-class PATTERN    pattern match the class names in source file(s)
                            (This option also sets disableParentDnValidation)
                            At end of processing a summary of processed/ignored
                            classes/objects is listed.

    --grep-v-class PATTERN  negated pattern matching
                            Similar to "grep -v PATTERN"

    --grep-dn PATTERN       pattern match the object dn names in source file(s)
                            (This option also sets disableParentDnValidation)
                            At end of processing a summary of processed/ignored
                            classes/objects is listed.

    --grep-v-dn PATTERN     negated pattern matching
                            Similar to "grep -v PATTERN"

    --ignore-variants       when merge tool finds several definitions of same
                            class or object the default behaviour is to abort
                            the merge processing and print out which
                            class/object is duplicated. With this option it is
                            possible to continue merge, keeping the first
                            instance of a class or object definition and ignore
                            the other. At end of processing a summary of
                            processed/ignored classes/objects is listed.

    --ignore-missing-class  do not require class definitions referred by an
                            object to exist in source file(s)
                            (Is required by default)

    --keepCommentElements   keep embedded comment elements
                            (experimental support: associates comment elements
                            with subsequent class or object element which may
                            be correct association....)

    --schema                validate input files and output(file) with the
                            supplied xsd schema file

    -s, --sort              sort the merged class and object names

    -t, --trace             print trace information to stderr

    -v, --version           print version information and exit

    -h, --help              display this help and exit

    See http://devel.opensaf.org/ for information and updates.
    """)


def print_version():
    """ Print version of immxml-merge tool """
    print ("immxml-merge version 0.5.1")


def main(argv):
    """ Main program """
    try:
        opts, args = getopt.getopt(argv, "thvso:",
                                   ["trace", "keep-comment-elements",
                                    "ignore-variants", "ignore-missing-class",
                                    "help", "version", "sort", "grep-class=",
                                    "grep-v-class=", "grep-dn=", "grep-v-dn=",
                                    "output=", "schema="])
    except getopt.GetoptError as err:
        # Print help information and exit:
        print_info_stderr("%s", str(err))
        print_usage()
        sys.exit(2)

    found_grep_dn = False
    found_grep_class = False

    for opt, value in opts:
        if opt in ["-t", "--trace"]:
            BaseOptions.traceOn = True
        if opt == "--keep-comment-elements":
            Options.keepCommentElements = True
        if opt == "--ignore-variants":
            Options.ignoreVariants = True
        if opt == "--ignore-missing-class":
            Options.ignoreMissingClass = True
        if opt == "--grep-class":
            if found_grep_class:
                abort_script("Only one --grep-v-class or --grep-class option "
                             "may be specified")
            found_grep_class = True
            Options.grep_class = value
            Options.disableParentDnValidation = True
        if opt == "--grep-v-class":
            if found_grep_class:
                abort_script("Only one --grep-v-class or --grep-class option "
                             "may be specified")
            found_grep_class = True
            Options.negateGrep_class = True
            Options.grep_class = value
            Options.disableParentDnValidation = True
        if opt == "--grep-v-dn":
            if found_grep_dn:
                abort_script("Only one --grep-v-dn or --grep-dn option may be "
                             "specified")
            found_grep_dn = True
            Options.negateGrep_dn = True
            Options.grep_dn = value
            Options.disableParentDnValidation = True
        if opt == "--grep-dn":
            if found_grep_dn:
                abort_script("Only one --grep-v-dn or --grep-dn option may be "
                             "specified")
            found_grep_dn = True
            Options.grep_dn = value
            Options.disableParentDnValidation = True
        if opt in ["-o", "--output"]:
            Options.stdout = False
            Options.outputFilename = value
        if opt == "--schema":
            Options.schemaFilename = value
        if opt in ["-s", "--sort"]:
            Options.sort = True
        if opt in ["-v", "--version"]:
            print_version()
            sys.exit(0)
        elif opt in ["-h", "--help"]:
            print_usage()
            sys.exit(0)

    # Cannot trace these until -t, Options.traceOn is effective (or not)
    trace("opts: %s", opts)
    trace("args: %s", args)
    trace("sys.path: %s", sys.path)

    if not args:
        # print ("stdin processing not yet supported! (if ever!)")
        print_usage()
        sys.exit(2)

    trace("Option object:\n %s", Options.print_option_settings())

    if not os.path.exists('/usr/bin/xmllint'):
        if Options.schemaFilename is None:
            # It is possible to continue without xmllint
            # (limited w.r.t formatting)
            print_info_stderr("")
            print_info_stderr("Cannot find the linux command /usr/bin/xmllint "
                              "which is required for formatting!")
            print_info_stderr("Script continues but the result file may for "
                              "example lack linefeed characters.")
            print_info_stderr("")
            Options.isXmlLintFound = False
        else:
            abort_script("Cannot find the required linux command "
                         "/usr/bin/xmllint. --schema option requires xmllint, "
                         "Exiting!")

    file_list = retrieve_file_names(args)
    trace("Starting to process files\n")
    # Create an Object to store classes and objects during process of
    # input files
    merged_doc = MergedImmDocument()
    merged_doc.initialize()

    for file_name in file_list:
        verify_input_file_read_access(file_name)
        merged_doc.process_input_file(file_name)
        trace("Done with file: %s", file_name)

    # Store the resulting document with collected class and object elements
    merged_doc.save_result()
    print_info_stderr("{0} Successfully merged input files!".format(
        datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]))

    return 0


if __name__ == "__main__":
    main(sys.argv[1:])
