#!/usr/local/anaconda3/bin/python

'''
Mike Markowski, mike.ab3ap@gmail
May 2021

This program uses input files of DNA matches to stitch together connections
using an adjacency matrix.  The matrix is then used to generate a 'dot'
language file so that Graphviz plots the results.

All DNA match files have the format:

person
name1|cm1
name2|cm2
...

where the file name exactly matches the first row.  Each match in the file
that also has a match must have the same name as well.  For the above
example, match files must be named 'name1' and 'name2'.  It is ok if given
file entry has no match file of its own.  That person will be ignored.

When making Graphviz circo graphs, during matrix reduction rows can be
re-ordered.  Row titles are therefore required.  An notional adjaceny matrix
might look like this:

[['00', 1, 1, 1, 1, 1, 1, 0, 0],
['01', 1, 1, 1, 1, 1, 1, 0, 0],
['02', 1, 1, 1, 0, 1, 1, 0, 0],
['03', 1, 1, 0, 1, 0, 0, 0, 0],
['04', 1, 1, 1, 0, 1, 1, 0, 0],
['05', 1, 1, 1, 0, 1, 1, 0, 0],
['06', 0, 0, 0, 0, 0, 0, 1, 1],
['07', 0, 0, 0, 0, 0, 0, 1, 1]]

where (unlabeled) column names, left to right, match row names ('00', '01',
etc., top to bottom.  If a Graphviz digraph is generated, rows are not
re-ordered and the matrix above would instead look like

[[1, 1, 1, 1, 1, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0],
[1, 1, 1, 0, 1, 1, 0, 0],
[1, 1, 0, 1, 0, 0, 0, 0],
[1, 1, 1, 0, 1, 1, 0, 0],
[1, 1, 1, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0, 1, 1]]

Code throughout is slightly different to handle these small differences.
In both cases, a dot language file is created for input to Graphviz,
and that file can be further manipulated if desired.

Any and all improvements are most welcome.  Please share and let's improve
the program!
'''

import circo
import digraph
import os
import person
import sys

class Dna:

	def parseArgs(self, argv):
		'''React to command line arguments appropriately as shown in usage().
		The -m argument results in printing matches for that person and
		exiting.  Otherwise, the adjacency matrix and plot are created.
		'''

		prog = argv[0]

		mkCirco = mkDigraph = False
		cmHi = 7000
		cmLo = -1
		showMatches = False
		target = ''

		i = 1
		dirFiles = '.'
		while i < len(argv):
			if argv[i] == '-c': # Generate Graphviz circo graph.
				mkCirco = True
			elif argv[i] == '-d': # Generate Graphviz directed graph.
				mkDigraph = True
			elif argv[i] == '-f': # Location of DNA match files.
				i += 1
				dirFiles = argv[i]
			elif argv[i] == '-h': # Ignore people whose cM > cmLo.
				i += 1
				try:
					cmHi = float(argv[i])
				except ValueError:
					print('Bad integer to -l: %s' % argv[i])
					self.usage(prog)
					sys.exit(1)
			elif argv[i] == '-l': # Ignore people whose cM < cmLo.
				i += 1
				try:
					cmLo = float(argv[i])
				except ValueError:
					print('Bad integer to -l: %s' % argv[i])
					self.usage(prog)
					sys.exit(1)
			elif argv[i] == '-m': # Print matches and exit.
				showMatches = True
				i += 1
				member = argv[i]
			elif argv[i] == '-t': # Target person of interest.
				i += 1
				target = argv[i]
			elif argv[i][0] == '-':
				self.usage(prog, argv[i])
				sys.exit(1)
			else:
				break
			i += 1

		if showMatches:
			p = person.Person('dummy|0', sortByName=False)
			matches = p.fileReader(
				dirFiles, member, cmLo, cmHi, masterList=True)
			for m in matches:
				print(m)
			exit(0)

		if i == len(argv):
			self.usage(prog)
			exit(0) # No more args to work with.
		mainPerson = argv[i]

		if mkCirco:
			c = circo.Circo()
			dnaCirco = c.build(dirFiles, mainPerson, cmLo, cmHi, target)
		if mkDigraph:
			d = digraph.Digraph()
			dnaDigraph = d.build(dirFiles, mainPerson, cmLo, cmHi, target)
		if not mkCirco and not mkDigraph:
			print('Choose -c or -d.')
			self.usage(prog)

	def printMatrix(self, title, m):
		'''Mainly for debugging or as example of intermediate states of
		adjacency matrix, this subroutine presents the matrix in a fairly
		compact form.  For readability, zeros are replaced with dashes.

		Inputs:
		  title (String) - title to be printed above matrix.
		  m ([[...],[...],...] - adjacency matrix.
		'''
		print(title)
		for i in range(len(m)):
			s = str(m[i])
			quote = s.rfind('\'')
			if quote > -1:
				names = s[:quote+1]
				s = s[quote+1:]
				s = s.replace(',', '') # Remove commas.
				s = s.replace('0 ', '- ') # Replace zeros with dashes.
				s = s.replace(' 0', ' -')
				s = s.replace('0 ', '0') # Remove spaces for compactness.
				s = s.replace(' 0', '0')
				s = s.replace('1 ', '1')
				s = s.replace(' 1', '1')
				s = s.replace('- ', '-')
				s = s.replace(' -', '-')
				s = names + ' ' + s # Space between name and numbers.
			print('%3d: %s' % (i, s))

	def printPeople(self, title, everyone, m):
		'''Generally for debugging, this subroutine prints the adjacency
		matrix with people's names rather than numbered entries.

		Inputs:
		  title (String) - title to print at start.
		  everyone (list[Persons]) - list of DNA matches.
		  m ([[...],[...],...]) - adjacency matrix.

		Output: none
		'''

		print(title)
		for r in range(len(m)): # Each row is a primary match.
			primary = self.matchName(everyone, m, r)
			print(primary)
			for c in range(1, len(m[r])): # Secondary, common matches.
				if m[r][c] == 0:
					continue
				print('%s' % self.matchName(everyone, m, c-1, 2))
				print('  -----')

	def usage(self, prog, badArg=None):
		'''When mistakes are made, remind user how to run program.'''
		if badArg != None:
			print('Unrecognized option %s' % badArg)
		print('\
Usage: %s\n\
          [-c]            circular, fully connected graph\n\
          [-d]            directed graph\n\
          [-f files]      directory holding Ancestry files\n\
          [-h hiThresh]   high cM threshold\n\
          [-l loThresh]   low cM threshold\n\
          [-m secondary]  match list for this person\n\
          [-t target]     with -c, highlight links for target person\n\
          primary        person to construct graphs for.'
			% prog)
		sys.exit(1)

def main(argv):
	d = Dna()
	d.parseArgs(argv)

if __name__ == '__main__':
	main(sys.argv)

