#! /usr/bin/env python

import os
import re
import fnmatch
import copy
import xml.etree.ElementTree as ET

import pprint
from xml.dom import minidom

tools_path = os.path.dirname(os.path.realpath(__file__))
base_path = os.path.realpath(os.path.join(tools_path, ".."))
langs_path = os.path.realpath(os.path.join(base_path, 'public', 'langs'))

def collectFiles(ext_list, ignored={}):
    result = []
    ignored.setdefault('dir', [])
    ignored.setdefault('file', [])
    ignored['dir'] = [os.path.realpath(path) for path in ignored['dir']]
    for root, dirs, files in os.walk(base_path):
        should_be_ignored = False
        for ignore in ignored['dir']:
            if root.startswith(ignore):
                should_be_ignored = True
        if should_be_ignored:
            continue

        for f in files:
            should_be_ignored = False
            for ignore in ignored['file']:
                if fnmatch.fnmatch(f, ignore):
                    should_be_ignored = True
                    break
            if should_be_ignored:
                continue

            parts = os.path.splitext(f)
            if len(parts) < 2:
                continue
            if (parts[1] in ext_list):
                result.append(os.path.join(root, f))
    result.sort()
    return result

def parseL18nStrings(path, regex):
    texts = {}
    with open(path) as f:
        lineno = 0
        for line in f:
            lineno += 1
            iterator = regex.finditer(line)
            for result in iterator:
                if not result:
                    continue
                text = result.group('text')
                if text in texts:
                    texts[text].append(lineno)
                else:
                    texts[text] = [lineno]
    return texts

def genTSData(data, lang='zh_CN'):
    tsNode = ET.Element('TS', {'version': '2.0', 'language': lang})
    for path in data:
        relpath = os.path.relpath(path, langs_path)
        contextNode = ET.SubElement(tsNode, 'context')
        nameNode = ET.SubElement(contextNode, 'name')
        nameNode.text = os.path.basename(path)

        sorted_keys = sorted(data[path].keys(), cmp=lambda x, y: cmp(data[path][x], data[path][y]))
        for text in sorted_keys:
            msgNode = ET.SubElement(contextNode, 'message')
            for loc in data[path][text]:
                ET.SubElement(msgNode, 'location', {'filename': relpath, 'line': str(loc)})
            sourceNode = ET.SubElement(msgNode, 'source')
            sourceNode.text = text
            ET.SubElement(msgNode, 'translation', {'type': 'unfinished'})
    return tsNode

def mergeContextNode(old_context_node, context_node):
    for old_msg_node in old_context_node.iter('message'):
        old_src_node = old_msg_node.find('source')
        if old_src_node is None:
            print "can not find 'source' node under old: " + ET.tostring(old_msg_node, 'utf-8')
            continue

        for msg_node in context_node.iter('message'):
            src_node = msg_node.find('source')
            if src_node is None:
                print "can not find 'source' node under new: " + ET.tostring(msg_node, 'utf-8')
                continue

            if old_src_node.text == src_node.text:
                old_tr_node = old_msg_node.find('translation')
                tr_node = msg_node.find('translation')
                tr_node.text = old_tr_node.text
                tr_node.attrib = old_tr_node.attrib
                break

def mergeTSData(ts_node, ts_path):
    ts_node_copy = copy.deepcopy(ts_node)
    old_ts_node = ET.parse(ts_path)
    root = old_ts_node.getroot()
    old_context_nodes = root.findall('context')
    for old_context_node in old_context_nodes:
        for context_node in ts_node_copy.iter('context'):
            name_node = context_node.find('name')
            old_name_node = old_context_node.find('name')
            if name_node.text == old_name_node.text:
                mergeContextNode(old_context_node, context_node)
                break

    writeTSFile(ts_path, ts_node_copy)
    return True

def writeTSFile(ts_path, ts_node):
    if os.path.exists(ts_path):
        os.rename(ts_path, ts_path+".bak")

    with open(ts_path, 'w') as f:
        f.write(minidom.parseString(ET.tostring(ts_node, 'utf-8')).toprettyxml(indent="  ", encoding='utf-8'))

def doWork(file_type, ts_list):
    ext_list = []
    ignored = {}
    if file_type == "backend":
        ext_list = ['.php']
    elif file_type == "frontend":
        ext_list = ['.coffee', '.js', '.html']
        ignored = {
                'dir': [
                    os.path.join(base_path, 'public/user_codes'),
                    os.path.join(base_path, 'public/user_data'),
                ],
                'file': [
                    '*.min.js',
                    '*.coffee'
                    ]
                }
    else:
        print "invalid file_type: " + file_type
        return
    files = collectFiles(ext_list, ignored)

    data = {}
    regex = re.compile(r"""L\(\s*(?P<quote>['"])(?P<text>.+?)(?P=quote)\s*\)""");
    for path in files:
        texts = parseL18nStrings(path, regex)
        if not texts:
            continue
        data[path] = texts

    if data:
        ts_node = genTSData(data);
        for lang, ts in ts_list:
            ts_node.set("language", lang)
            ts_path = os.path.join(langs_path, ts)
            if os.path.exists(ts_path):
                if not mergeTSData(ts_node, ts_path):
                    writeTSFile(ts_path, ts_node)
            else:
                writeTSFile(ts_path, ts_node)

if __name__ == '__main__':
    lang_codes = ('zh_CN', 'ko', 'nl', 'es', 'iw', 'fr')

    for file_type in ('backend', 'frontend'):
        ts_list = [(code, 'cpt-web_'+file_type+'_'+code+'.ts') for code in lang_codes]
        doWork(file_type, ts_list)



