"""Module for RNAforesterSearcher classes."""

import os, math, re
import subprocess
from subprocess import CalledProcessError

import dynamit.motifSearcher
import dynamit.utils

class RNAforesterSearcher(dynamit.motifSearcher.MotifSearcher):
	"""Class implementing a RNAforester RNA secondary
	structure motif search component, running this tool
	on the provided input sequences and providing its
	processed motifs and instances as results.
	"""
	def __init__(self):
		"""Initialize all class attributes with their default values.
		"""
		super(self.__class__, self).__init__()
		self.searcherName = "RNAforester"
		self.path = ""
		self.params = ""

	def setConfiguration(self, path, params):
		"""Loads the searcher parameters specified in the configuration file.

		Args:
			path: path of the RNAforester executable file.
			params: parameters to be passed to RNAforester
							along with the sequences filename.

		Returns:
			Returns 0 if everything went fine, 1 and an error message otherwise.
		"""
		self.path = path
		self.params = params
		return 0

	def runSearch(self, sequencesFilename):
		"""Performs motif search by executing the RNAforester
		secondary structure motif search tool and processing
		its results to provide motifs and related instances.

		Args:
			sequencesFilename: input sequences filename for this run.

		Returns:
			Returns a list of strings representing identified motif matches
			if everything went fine (details on results filenames, etc.,
			are printed to the console); returns 1 and an error message otherwise.
		"""
		# convert sequences file to single line sequences FASTA
		singleLineFilename = os.path.splitext(sequencesFilename)[0] + \
												 "_singleLine.fasta"
		if (dynamit.utils.makeSingleLineFASTA(sequencesFilename,
																					singleLineFilename) != 0):
			print "[ERROR] Unable to create single line sequences file."
			return 1

		# compose the complete command-line for launching RNAforester.
		completePath = os.path.join(self.path, "RNAforester") + \
									 " -p -m " + self.params

		# prepare sequences dictionary to be later passed
		# to processRNAforesterResults
		sequences = dict([(seqRecord.description, str(seqRecord.seq)) \
											for seqRecord in dynamit.utils.getSequencesRecords(singleLineFilename)])

		try:
			# open input and output files for RNAforester.
			inSeqsHandle = open(singleLineFilename)
			outResultsHandle = open(singleLineFilename + ".RNAforester", "wb")
			# launch RNAforester and wait for its execution to
			# complete (store its stderr for use if an error happens).
			callOutput = subprocess.check_output(completePath, shell=True,
																					 stderr=subprocess.STDOUT,
																					 stdin=inSeqsHandle)
			# write results to the output file and close it.
			outResultsHandle.write(callOutput)
			outResultsHandle.close()

			# check if RNAforester results exist
			if os.path.isfile(singleLineFilename + ".RNAforester"):
				# extract results
				print "  [RNAforesterSearcher] Search completed."
				self.searchResults = self._processRNAforesterResults(sequences,
																	singleLineFilename + ".RNAforester")
			else:
				print "[ERROR] Could not find RNAforester results file."
				return 1

			# remove cluster.dot and test.out files (generated by RNAforester)
			# but not needed in this context.
			if os.path.isfile("cluster.dot"):
				os.remove("cluster.dot")
			if os.path.isfile("test.out"):
				os.remove("test.out")

			print "  [RNAforesterSearcher] Execution completed."
			return self.searchResults
		except CalledProcessError as e:
			# inform about the error that happened, and abort searcher execution.
			print "[ERROR] RNAforester execution terminated with an error:" + e.output
			return 1
		finally:
			# close input and output files.
			inSeqsHandle.close()
			if outResultsHandle:
				outResultsHandle.close()


	def _processRNAforesterResults(self, sequences, resultsFilename):
		""" Process results contained in RNAforester output files to
		produce a table for subsequent DynaMIT phases.

		Args:
			sequences: a dictionary of sequences (id is key, sequence is value).
			resultsFilename: the RNAforester results filename.
		Returns:
			Returns a list of strings, one per motif match, containing
			motif sequence, sequence id, match position, etc.
		"""
		print "  [RNAforesterSearcher] Processing results: <", \
					os.path.basename(resultsFilename), ">"
		try:
			with open(resultsFilename) as f:
				lines = f.readlines()

			processedResults = []
			isInResults = False
			isInCluster = False
			resultsStart = 0
			motifConsensus = ""
			motifScore = 0
			motifStructConsensus = ""


			i = 0
			while i < len(lines):
				line = lines[i].rstrip('\n')

				if isInResults:
					# we have collected the motif consensus and are
					# now processing its instances
					if (line == '\n') or line.startswith("    "):
						# we are done mapping instances, so return the results
						isInResults = False
						isInCluster = False
					else:
						# maps current instance to its position on the sequence,
						# then add it to results
						info = re.split(r'\s+', line)
						# get the match full sequence ID.
						fullSeqID = dynamit.utils.getFullSequenceID(sequences, info[0], 0)
						fullSeqID = info[0] if fullSeqID == -1 else fullSeqID
						# remove gaps to find the motif instance position and, if
						# original sequences contained "T"s replace Us in RNAforester.
						noGapsSeq = info[1].replace("-", "").replace("U", "T") \
												if sequences[fullSeqID].find("T") >= 0 \
												else info[1].replace("-", "")
						motifStart = sequences[fullSeqID].find(noGapsSeq) + 1
						# if the motif instance was not found on the sequence, we
						# likely selected the wrong full sequence ID, so try again
						# systematically on all sequence until found.
						if motifStart == 0:
							for seqId, seq in sequences.iteritems():
								if seqId.startswith(info[0]):
									motifStart = seq.find(noGapsSeq) + 1
									if motifStart > 0:
										fullSeqID = seqId
										break
						# add the instance to the searcher results.
						processedResults.append(motifConsensus + \
																		"\tstructure\t" + self.searcherName + \
																		"\t" + fullSeqID + "\t" + str(motifStart) + \
																		"\t" + str(motifStart + len(info[1])) + \
																		"\t" + motifScore + "\t" + motifStructConsensus)

				if line.startswith("RNA Structure Cluster Nr"):
					# we have reached the portion of the file containing motifs instances,
					# so skip additional lines, store the motif score and the starting
					# lines of motifs instances and go on, we will go back to instances
					# after collecting the motif consensus
					motifScore = lines[i+1].rstrip('\n').split(':')[1].lstrip(' ')
					motifScore = str(math.log(float(motifScore), 2))
					i += 4
					resultsStart = i
					isInCluster = True
					continue

				if line.startswith("Consensus sequence/structure:") and isInCluster:
					# we have reached the portion of the file containing the
					# motif consensus, so get its stucture/sequence representation.
					i += 11
					motifConsensus = lines[i].rstrip('\n').lstrip(' ').replace("U", "T")
					motifStructConsensus = lines[i+1].rstrip('\n').lstrip(' ')
					# now that we have the consensus, move back to motif
					# instances to record these
					i = resultsStart
					isInResults = True
					continue

				i += 1

			return processedResults
		except (IOError, IndexError, KeyError, RuntimeError, ValueError) as e:
			print "  [RNAforesterSearcher] Unexpected error: %s" % str(e)
			return 1
