
import fnmatch
from zope.interface import classProvides
from zope.interface import implements
from collective.transmogrifier.interfaces import ISectionBlueprint
from collective.transmogrifier.interfaces import ISection
from collective.transmogrifier.utils import Matcher,Condition
from urllib import unquote
import urlparse
import re
import logging
from transmogrify.pathsorter.treeserializer import TreeSerializer

"""
Backlinks Title
===============

This blueprint will take the _backlinks from the item generated by webcrawler
and if not Title field has been given to the item it will attempt to guess
it from the link names that linked to this document.
You can specify an option 'ignore' option to specify titles never to use

If it can't guess it from the backlinks it will default to using the file name after
cleaning it up somewhat
"""

class BacklinksTitle(object):
    classProvides(ISectionBlueprint)
    implements(ISection)

    def __init__(self, transmogrifier, name, options, previous):
        self.previous = previous
        self.toignore=options.get('ignore','next\nprevios\n').strip().split('\n')
        self.treeserializer = TreeSerializer(transmogrifier, name, options, previous)
        self.condition = Condition(options.get('condition', 'python:True'),
                                   transmogrifier, name, options)
        self.logger = logging.getLogger(name)


    def __iter__(self):
        items = []
        defaultpages  = {}
        for item in self.treeserializer:
            path = item.get('_path', None)
            backlinks = item.get('_backlinks')
            title = item.get('title')
            defaultpage = item.get('_defaultpage')
            if path is None:
                items.append(item)
                continue
                
            if not self.condition(item):
                items.append( item )
                self.logger.debug("skipping %s (condition)" % (path))
                continue  
            elif title:
                items.append( item )
                self.logger.debug("using existing title=%s %s" % (title, path))
                continue
            elif defaultpage:
                # save and we'll use that for title
                indexpath = urlparse.urljoin(path+'/', defaultpage)
                defaultpages[indexpath] = item
                items.append( item )
                continue
            elif not backlinks:
                self.titlefromid(item)
                items.append( item )
                continue
            names = [name for url, name in backlinks if not self.ignore(name)]
            # do a vote
            votes = {}
            for name in names:
                votes[name] = votes.get(name,0) + 1
            votes = [(c,name) for name,c in votes.items()]
            votes.sort()
            if votes:
                c,item['title'] = votes[-1]
                self.logger.info("title=%s for %s (from backlinks)" % (item['title'],path))
            else:
                self.titlefromid(item)
            # go back and title the folder if this is a default page
                
            items.append( item )
        items2 = []
        for item in items:
            path = item.get('_path')
            folder = defaultpages.get(path)
            if folder:
                if 'title' in item:
                    folder['title'] = item['title']
                if 'description' in item:
                    folder['description'] = item['description']
            items2.append( item )
        for item in items2:
            yield item

    def ignore(self, name):
        for pat in self.toignore:
            if re.search(pat,name):
                return True
        return False

    def titlefromid(self, item):
        path = item.get('_path')
        if not path:
            return
        title = [p for p in path.split('/') if p][-1]
        title = unquote(title)
        title = title.split('.')[0]
        item['title'] = title
        self.logger.info("title=%s for %s (from id)" % (item['title'],path))
