import os  
import sys
import time
import xml.dom.minidom
import re
import gdbm as dbmodule
import gzip
import optparse

################################################################
# globals

silent=False
mail_to_name_dict = {}
pending_patches = {}
used_tags = {}

################################################################
# utils

class PullConflict (Exception):
    pass
class CommandFailed (Exception):
    pass

def progress (s):
    sys.stderr.write (s + '\n')
    
def get_cli_options ():
    p = optparse.OptionParser ()

    p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
    p.description='''Convert darcs repo to git.

This tool is a one shot conversion utility for Darcs repositories.  It
requires Git version that has git-fast-import.  It does not support
incremental updating.

This tool will import the patches in chronological order, and only creates
merges when a resolved conflict is detected.

TODO:

- correct time zone handling
- 

'''

    def update_map (option, opt, value, parser):
        for l in open (value).readlines ():
            (mail, name) = tuple (l.strip ().split ('='))
            mail_to_name_dict[mail] = name

    p.add_option ('-a', '--authors', action='callback',
                  callback=update_map,
                  type='string',
                  nargs=1,
                  help='read a text file, containing EMAIL=NAME lines')

    p.add_option ('-d', '--destination', action='store',
                  type='string',
                  default='',
                  dest='target_git_repo',
                  help='where to put the resulting Git repo.')

    p.add_option ('--verbose', action='store_true',
                  dest='verbose',
                  default=False, 
                  help='show commands as they are invoked')
    
    options, args = p.parse_args ()
    if not args:
        p.print_help ()
        sys.exit (2)
        
    global silent
    silent = not options.verbose

    if not options.target_git_repo:
        p = args[0]
        p = os.path.abspath (p)
        options.target_git_repo = os.path.basename (p).replace ('.darcs', '')
        options.target_git_repo += '.git' 
       
        
    return (options, args)

def read_pipe (cmd, ignore_errors=False):
    if not silent:
        progress ('pipe %s' % cmd)
    pipe = os.popen (cmd)

    val = pipe.read ()
    if pipe.close () and not ignore_errors:
        raise CommandFailed ("Pipe failed: %s" % cmd)
    
    return val

def system (c, ignore_error=0):
    if not silent:
        progress ( c)
    if os.system (c) and not ignore_error:
        raise CommandFailed ("Command failed: %s" % c)

def darcs_date_to_git (x):
    t = time.strptime (x, '%Y%m%d%H%M%S')
    return '%d' % int (time.mktime (t))

def darcs_timezone (x) :
    time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')

    # todo
    return "+0100"

################################################################
# darcs

class DarcsConversionRepo:
    def __init__ (self, dir, patches):
        self.dir = dir
        self.patches = patches
        
    def clean (self):
        system ('rm -rf %s' % self.dir)
        
    def pull (self, patch):
        id = patch.attributes['hash']
        source_repo = patch.dir
        dir = self.dir
        system ('cd %(dir)s && darcs pull --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())

    def go_from_to (self, from_patch, to_patch):

        """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
        PullConflict if conflict is detected

        This uses the fishy technique of writing the inventory and
        constructing the pristine tree with 'darcs repair'

        It might be quicker and/or more correct to wind/rewind the
        repo with pull and unpull.  """

        
        dir = os.path.abspath (self.dir)
        system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo  %(dir)s'
                % locals ())
    
        source = to_patch.dir
        if from_patch:
            iv = open (dir + '/_darcs/inventory', 'w')
            for p in self.patches[:from_patch.number+1]:
                os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
                iv.write (p.header ())
                
            iv.close ()

            progress ('Go to patch %d' % from_patch.number)
            system ('cd %(dir)s && darcs repair --quiet' % locals ())
            system ('rsync -a  %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
        try:
            self.pull (to_patch)
            success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals  ())
        except CommandFailed:
            raise PullConflict ()
    

        if not success:
            raise PullConflict ()

    def has_patch (self, p):
        id = p.attributes['hash']
        f = self.dir + '/_darcs/patches/' + id
        return os.path.exists (f)

    def pristine_tree (self):
        return self.dir + '/_darcs/pristine'
    
class DarcsPatch:
    def __init__ (self, xml, dir):
        self.xml = xml
        self.dir = dir
        self.number = -1
        self.attributes = {}
        for (nm, value) in xml.attributes.items():
            self.attributes[nm] = value

        # fixme: ugh attributes vs. methods.
        self.extract_author ()
        self.extract_message ()
        self.extract_time ()

    def filename (self):
        return self.dir + '/_darcs/patches/' + self.attributes['hash']

    def contents (self):
        f = gzip.open (self.filename ())
        return f.read ()

    def header (self):
        lines = self.contents ().split ('\n')

        name = lines[0]
        committer = lines[1] + '\n'
        committer = re.sub ('] {\n$', ']\n', committer)
        committer = re.sub ('] *\n$', ']\n', committer)
        comment = ''
        if not committer.endswith (']\n'):
            for l in lines[2:]:
                if l[0] == ']':
                    comment += ']\n'
                    break
                comment += l + '\n'

        header = name  + '\n' + committer 
        if comment:
            header += comment
   
        return header

    def extract_author (self):
        mail = self.attributes['author']
        name = ''
        m = re.search ("^(.*) <(.*)>$", mail)

        if m:
            name = m.group (1)
            mail = m.group (2)
        else:
            try:
                name = mail_to_name_dict[mail]
            except KeyError:
                name = mail.split ('@')[0]

        self.author_name = name
        self.author_mail = mail

    def extract_time (self):
        self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])

    def name (self):
        patch_name = '(no comment)'
        try:
            name_elt = self.xml.getElementsByTagName ('name')[0]
            patch_name = name_elt.childNodes[0].data
        except IndexError:
            pass
        return patch_name
    
    def extract_message (self):
        patch_name = self.name ()
        comment_elts = self.xml.getElementsByTagName ('comment')
        comment = ''
        if comment_elts:
            comment = comment_elts[0].childNodes[0].data

        if self.attributes['inverted'] == 'True':
            patch_name = 'UNDO: ' + patch_name

        self.message = '%s\n\n%s' % (patch_name, comment)

    def tag_name (self):
        patch_name = self.name ()
        if patch_name.startswith ("TAG "):
           tag = patch_name[4:]
           tag = re.sub (r'\s', '_', tag).strip ()
           tag = re.sub (r':', '_', tag).strip ()
           return tag
        return ''

def get_darcs_patches (darcs_repo):
    progress ('reading patches.')
    
    xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)

    dom = xml.dom.minidom.parseString(xml_string)
    xmls = dom.documentElement.getElementsByTagName('patch')

    patches = [DarcsPatch (x, darcs_repo) for x in xmls]

    n = 0
    for p in patches:
        p.number = n
        n += 1

    return patches

################################################################
# GIT export

def export_tree (tree, gfi):
    tree = os.path.normpath (tree)
    gfi.write ('deleteall\n')
    for (root, dirs, files) in os.walk (tree):
        for f in files:
            rf = os.path.normpath (os.path.join (root, f))
            s = open (rf).read ()
            rf = rf.replace (tree + '/', '')
            
            gfi.write ('M 644 inline %s\n' % rf)
            gfi.write ('data %d\n%s\n' % (len (s), s))
    gfi.write ('\n')

    
def export_commit (repo, patch, last_patch, gfi):
    gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
    gfi.write ('mark :%d\n' % (patch.number + 1))
    gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
                                           patch.author_mail,
                                           patch.date))
    gfi.write ('data %d\n%s\n' % (len (patch.message), patch.message))
    if last_patch:
        gfi.write ('from :%d\n' % (last_patch.number + 1))
        if pending_patches.has_key (last_patch.number):
            del pending_patches[last_patch.number]
        
    for (n, p) in pending_patches.items ():
        if repo.has_patch (p):
            gfi.write ('merge :%d\n' % (n + 1))
            del pending_patches[n]
        
    pending_patches[patch.number] = patch
    export_tree (repo.pristine_tree (), gfi)

def export_pending (gfi):
    if len (pending_patches.items ()) == 1:
        gfi.write ('reset refs/heads/master\n')
        gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
        return
    
    
    for (n, p) in pending_patches.items ():
        gfi.write ('reset refs/heads/master%d\n' % n)
        gfi.write ('from :%d\n\n' % (n+1))

    patches = pending_patches.values()
    patch = patches[0]
    gfi.write ('commit refs/heads/master\n')
    gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
                                           patch.author_mail,
                                           patch.date))
    msg = 'tie together'
    gfi.write ('data %d\n%s\n' % (len(msg), msg))
    gfi.write ('from :%d\n' % (patch.number + 1))
    for p in patches[1:]:
        gfi.write ('merge :%d\n' % (p.number + 1))
    gfi.write ('\n')
        
    
def export_tag (patch, gfi):
    gfi.write ('tag %s\n' % patch.tag_name ())
    gfi.write ('from :%d\n' % (patch.number + 1))
    gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
                                    patch.author_mail,
                                    patch.date))
    gfi.write ('data %d\n%s\n' % (len (patch.message),
                                  patch.message))
    
################################################################
# main.
def main ():

    (options, args) = get_cli_options ()
    
    darcs_repo = os.path.abspath (args[0])
    git_repo = os.path.abspath (options.target_git_repo)
    
    system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
    os.environ['GIT_DIR'] = git_repo

    gfi = os.popen ('git-fast-import', 'w')    #
    
    patches = get_darcs_patches (darcs_repo)

    conv_repo = DarcsConversionRepo ("darcs2git.tmpdarcs", patches)
    for p in patches:
        parent = p.number - 1

        last = None
        while 1:
            if parent >= 0:
                last = patches[parent]

            try:
                conv_repo.go_from_to (last, p)
                break
            except PullConflict:
                ## simplistic, may not be enough.
                progress ('conflict, going one back')
                parent -= 1

                if parent < 0:
                    raise Exception('urg')

        progress ('Export %d -> %d (total %d)' % (parent,
                                                 p.number, len (patches)))
        export_commit (conv_repo, p, last, gfi)
        if p.tag_name ():
            export_tag (p, gfi)
    export_pending (gfi)
    gfi.close ()

    system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ())
    conv_repo.clean ()
    
main ()

