'''
Mike Markowski, mike.ab3ap@gmail
May 2021

This class creates an adjacency matrix and reduces it to a minimal directed
graph.

A file dnaDigraph.gv in dot language is created for input to Graphviz
that file can be further manipulated if desired.
'''

import numpy as np
import os
import person
import sys

class Digraph:

	def build(self, dirFiles, mainPerson, cmLo=-1, cmHi=-1, target=''):
		'''Build an adjacency matrix for a specified Ancestry customer,
		create a Graphviz dot file named dna.gv for the matrix, and call
		Graphviz to create the plot in a file named dna.png.

		Inputs:
		  mainPerson(String) - name of person to make a DNA graph for.  A
		    filename of DNA matches must exist with this name.
		  cmLo (float) - ignore DNA matches who share less than this amount of
		    cM's.
		  cmHi (float) - ignore DNA matches who share more than this amount of
		    cM's.

		Output:
		  ([[...],[...],...]) - adjacency matrix where 1's indicate common
		  matches between people named by row and column, and 0's indicate
		  that there is no match.
		'''

		# Retrieve DNA matches of root person.
		if cmHi == -1:
			cmHi = 7000 # Match everyone if no specified.
		root = person.Person(mainPerson, 7000, sortByName=False)
		everyone = root.fileReader(
			dirFiles, mainPerson, cmLo, cmHi, masterList=True)
#		self.printMatrix('Everyone', everyone)
		n = len(everyone)
		if n == 0:
			return # Nothing to do.
		am = []
		for p in everyone:
			am += p.findMatches(dirFiles, everyone, p, cmLo)
		amR = self.reduce(am) # Combine identical rows.
#		self.printPeople('DNA Connections', everyone, amR)
		self.printDigraph(dirFiles, 'dnaDigraph.gv', everyone, amR, target)
		return amR

	def path(self, am, s, dest):
		'''Find length of longest path between two vertices in a directed
		acyclic graph.  Implementation, with same variable names, is from:

		https://en.wikipedia.org/wiki/Topological_sorting#Application_to_shortest_path_finding

		Inputs:
		  am (float[][]) - adjacency matrix, topologically ordered.
		  s (int) - source person
		'''

		# Longest path is same as shortest path if graph inverse.
		V = np.array(am) * -1 # Negate graph weights.
		n = V[0].size # Number of vertices (people) in graph.

		#
		# Shortest path discovery, linear time.
		#

		d = np.ones(n) * float("inf") # Shortest path distances from s.
		d[s] = 0 # Quick trip from self to self!
		p = np.ones(n) * -1 # pred[u] holds u predecessor in path s to u.
		# Loop over the vertices u as ordered in V, starting from s:
		for u in range(s, n): # Loop over vertices u, starting from s.
			for v in range(u+1, n): 
				w = V[u][v] # Weight moving from u to v.
				if w == 0:
					continue # No path from u to v.
				# Relax the edge.
				if d[v] > d[u] + w: # Found a shorter path.
					d[v] = d[u] + w # Update length.
					p[v] = u # Update predecessor.

		# Longest path length is negated shortest path.
		return -1 if d[dest] == float("inf") else -d[dest]

	def pathRecursive(self, am, nFrom, nTo):
		'''XXX Do not use this! XXX

		This does the smae thing as path() but much more slowly.  It's left
		here only because it works and is possibly easier to follow than the
		more efficient code based on a Wikipedia page.
		'''

		if nFrom == nTo:
			return 1 # Sucess!

		if sum(am[nFrom]) == 1:
			return 0 # Fail.  No path.

		lenMax = 0;
		for col in range(len(am[nFrom])-1, nFrom, -1):
			if am[nFrom][col] == 0:
				continue
			lenThis = self.pathRecursive(am, col, nTo)
			if lenThis == 0: # No path.
				continue
			lenMax = 1 + max(lenMax, lenThis)
		return lenMax

	def printDigraph(self, dirFiles, gvname, everyone, m, target):
		'''Generate Graphviz dot file.  While there is no return file, two
		things area created: gvname and a png file with the plot.

		Inputs:
		  gvname (String) - name of output Graphviz dot file.
		  everyone (list[Persons]) - list all primary DNA matches who also
		    have common matches with other primary matches.
		  m ([[...],[...],...]) - adjacency matrix.

		Output: none
		'''
		f = open(gvname, 'w')
		f.write('digraph dna {\n')
		f.write('splines=true\n')
		f.write('ratio=expand\n')
		f.write('sep=0.1\n')
		f.write('graph [overlap=false]\n')

		if target != '':
			t = person.Person(target, 1)
			tMatches = t.fileReader(dirFiles, target)
		for r in range(len(m)): # Each row is a primary match.
			name = str(everyone[r])
			if target == '':
				f.write('%d [shape=box, label="%s"]\n' % (r, name))
			elif target == everyone[r].name():
				f.write('%d [shape=box, label="%s" color="blue" penwidth=3]\n'
					% (r, name))
			elif t.findMember(tMatches, everyone[r]) == -1:
				f.write('%d [shape=box, label="%s"]\n' % (r, name))
			else:
				f.write('%d [shape=box, label="%s" color="orange" penwidth=3]\n'
					% (r, name))

		for r in range(len(m)-1):
			s = '  %d -> {' % r
			for c in range(r+1,len(m)):
				if m[r][c] == 1:
					s += '%d ' % c
			s += '}\n'
			if s.find('{}') == -1:
				f.write(s)
		f.write('}\n')
		f.close()
		pngname = gvname[:gvname.rfind('.')] + '.png'
		cmd = ('dot -T png -o %s %s' % (pngname, gvname))
		os.system(cmd)
		print('%s created.' % gvname)
		print('%s created.' % pngname)
		print('Regenerate png with: %s' % cmd)

	def reduce(self, amRaw):
		'''After the initial adjacency matrix is made where connection is
		explicit and sometimes redundant, this routine will reduce complexity
		of the matrix by combining rows and columns when possible.

		Inputs:
		  am ([[...],[...],...]) - adjacency matrix.

		Outut:
		  ([[...],[...],...]) - reduced adjacency matrix.

		'''
		am = amRaw.copy()

		for i in range(len(am)):
			for j in range(i):
				am[i][j] = 0

#		self.printMatrix('Matrix Raw', am)

		# Pass 1, matrix reduction.
		for col in range(len(am)-1, -1, -1): # Start at bottom.
			paths = []
			for row in range(col-1, -1, -1): # Start at right.
				if am[row][col] == 1:
					found = False
					for i in range(len(paths)):
						if am[row][paths[i]] == 1:
							found = True
							am[row][col] = 0
					if not found:
						paths.append(row)

#		self.printMatrix('Matrix Reduced', am)

		# Pass 2, matrix reduction.
		for row in range(len(am)):
			# Find indexes where row contents are 1.
			ones = np.where(np.array(am[row]) == 1)[0][1:]
			if ones.size <= 1:
				continue # No alternative paths to consider.

			for colRight in range(ones.size-1, -1, -1):
				# Destination is node #colRight.
				for colLeft in range(colRight-1, -1, -1):
					lenLeft = self.path(am, ones[colLeft], ones[colRight])
					if lenLeft > 1:
						am[row][ones[colRight]] = 0
						break
		return am

