import clean
import numpy as np
import operator
import os
import person
import sys

'''
The Person class stores the two important pieces of data that describe an
Ancestry user: displayed name and number of shared centiMorgans.  A handful
of useful helper functions are also provided.

Mike Markowski, mike.ab3ap@gmail.com
May 2021
'''

class Person:

	def __eq__(self, other):
		'''Two instances of Person are equal if names are the same.'''

		if type(other) != Person:
			return False
		return self.myName == other.myName

	def __init__(self, myName, myCm=0, sortByName=True):
		'''Constructor.

		Inputs:
		  myName (String) - Ancestry name as displayed on their web pages.
		  myCm (int) - default 0.  If 0, myName must be in name|cM format.
		  sortByName (boolean) - used to distinguish between storing lists of
		    Person by ascending name or descending cM.
		'''

		self.sortByName = sortByName
		self.rootPerson = '?'
		if myCm != 0:
			self.myName = myName # Ancestry user name as shown in DNA matches.
			self.myCm = float(myCm) # Shared centi-Morgans with this person.
		else: # Parse name|cM string into name and cM.
			pipe = myName.find('|')
			if pipe == -1:
				print('String %s not in name|cM format.  Ignoring.' % myName)
				return
			self.myName = myName[:pipe].strip()
			self.myCm = float(myName[pipe+1:])

	def __str__(self):
		'''Present an instance as name|cM.'''

		return self.myName + '|' + str(int(round(self.myCm)))

	def cM(self):
		'''Return number of shared cM's with this Person.'''
		return self.myCm

	def fileReader(
		self, dirFiles, filename, cmLo=-1, cmHi=7000, masterList=False):
		'''Read DNA matches of an individual, either primary or shared
		matches.

		First line of file is Ancestry user name whose matches follow.
		Format of each subsequent line must be name|cM, one per line.
		Spaces are ok in the name and case is important.  Names listed
		are used as keys tin other files.	First line must be a
		single string, the Ancestry username of the secondary person.
		Subsequent lines list common matches between primary and
		secondary persons.

		Inputs:
		  dirFiles (String) - name of directory containing DNA match files.
		  filename (String) - name of file containing copy/pasted text from
		    an Ancestry web page for a DNA match.
		  cmLo (int) - ignore DNA matches who share less than this amount of
		    cM's.
		  cmHi (int) - ignore DNA matches who share more than this amount of
		    cM's.
		  masterList (boolean) - a master list is a list every DNA match with
		    the primary person.  A master list does not contain the name of
			the primary person and does not contain the names of people who
			match the primary person but have no shared DNA matches.
		Output:
		  matches (list[Persons]) - list of Person instances corresponding to
		    matches from from filename.  List is sort with ascending user
			name.
		'''

		# Ensure raw file has been formatted as fileMember.
		fileMember = dirFiles + '/' + filename
		exists = os.path.isfile(fileMember)
		if not exists: # No data, try to create fileMember from .raw.
			rawfile = fileMember + '.raw'
			if not clean.clean(rawfile):
				if masterList: # Unexpected, primary person not found.
					print('%s not found.' % rawfile)
				return []

		# Open fileMember of DNA matches.
		try:
			f = open(fileMember, 'r')
		except FileNotFoundError:
			if masterList:
				print('%s not found.' % fileMember)
			return []

		if masterList: # List of direct DNA matches.
			self.rootPerson = filename
		myName = f.readline().strip() # DNA match's username.
		matches = []
		dupes = []
		for line in f:
			p = person.Person(line, sortByName=self.sortByName)
			if not (cmLo <= p.cM() <= cmHi):
				# DNA cM out of range.
				continue
			if not masterList and p.name() == self.rootPerson:
				# Don't return root person's name.
				continue
			if (masterList
				and self.fileReader(dirFiles, p.name(), cmLo, cmHi) == []):
				# No matches for this person, so don't save person.
				continue
			if self.findMember(matches, p) > -1:
				if not p.name() in dupes:
					print('%s: duplicate user %s.  Manual correction required.'
						% (filename, p))
				if not p.name() in dupes:
					dupes.append(p.name())
				continue

			matches.append(p)
		f.close()

		if self.sortByName:
			matches.sort(key=operator.attrgetter('myName'))
		else:
			matches.sort(key=operator.attrgetter('myCm'), reverse=True)
		return matches

	def findMatches(self, dirFiles, everyone, target, cmLo=-1, cmHi=7000):
		'''Create one row of an adjacency matrix representing a particular
		Person, a primary DNA match.  Common matches between this Person and
		the primary person are read in.  For every match, that column in this
		newly created matrix row is set to 1.

		Inputs:
		  everyone (list[Persons]) - list all primary DNA matches who also
		    have common matches with other primary matches.
		  target (Person) - person to find in master list 'everyone'.
		  cmLo (int) - ignore DNA matches who share less than this amount of
		    cM's.
		  cmHi (int) - ignore DNA matches who share more than this amount of
		    cM's.
		Output:
		  (list[]) - one row of an adjacency matrix.  First column is a
		    string and name of the row.  Remaining element are 1 or 0.
		'''

		# Matrix sorted by name has name in column 0, by cM does not.
		co = 1 if self.sortByName else 0 # Row offset for type of matrix.
		matches = target.fileReader(dirFiles, target.name(), cmLo, cmHi)
		row = list(np.zeros(len(everyone)+co, dtype=int))
		tInd = self.findMember(everyone, target)
		if tInd == -1:
			print('Primary DNA match %s not found.  Quitting.' % target.name())
			sys.exit(1)

		row[0] = '%5d' % tInd
		row[tInd+co] = 1 # Everyone has DNA match with self!
		for m in matches: # Loop through common DNA matches.
			i = self.findMember(everyone, m) # myMembers[i] = m
			if i == -1:
				continue # No data file for this person's common matches.
			row[i+co] = 1 # Indicate match in adjacency matrix.
		return [row]

	def findMember(self, everyone, target, start=0, stop=-1):
		if self.sortByName:
			return self.findMemberBinary(everyone, target, start, stop)
		else:
			return self.findMemberBruteForce(everyone, target)

	def findMemberBinary(self, everyone, target, start=0, stop=-1):
		'''Master list of matches assumed to be in increasing order by
		username.  This makes it possible to quickly find a target Person
		with a binary search.

		Inputs:
		  everyone (list[Persons]) - list all primary DNA matches who also
		    have common matches with other primary matches.
		  target (Person) - person to find in master list 'everyone'.
		  start (int) - lower 'everyone' index for search.
		  stop (int) - upper 'everyone' index for search.
		Output:
		  (int) index of 'target' in 'everyone', or -1 if not found.
		'''

		if len(everyone) == 0:
			return -1

		if stop == -1:
			stop = len(everyone) - 1

		if start == stop:
			p = everyone[start]
			return start if target.name() == p.name() else -1

		mid = (start + stop) / 2
		mid0 = int(np.floor(mid))
		p = everyone[mid0]
		if target.name() == p.name():
			return mid0
		elif target.name() < p.name():
			return self.findMember(everyone, target, start, mid0)
		else:
			mid1 = int(np.ceil(mid))
			return self.findMember(everyone, target, mid1, stop)

	def findMemberBruteForce(self, everyone, target):
		'''Brute force search to find target in everyone.  There must be
		a better way!  This is used when people are sorted in descending cM
		order and finedMemberBinary can't be used.

		Inputs:
		  everyone (list[Persons]) - list all primary DNA matches who also
		    have common matches with other primary matches.
		  target (Person) - person to find in master list 'everyone'.
		Output:
		  (int) index of 'target' in 'everyone', or -1 if not found.
		'''

		for i in range(len(everyone)):
			if everyone[i].name() == target.name():
				return i
		return -1

	def name(self):
		'''Return this Person's name.'''
		return self.myName

	def sortName(self, plist):
		'''Sort a list of Persons in descending name order.'''
		return sorted(plist, key=operator.attrgetter('myName'))

	def sortCm(self, plist):
		'''Sort a list of Persons in descending cM order.'''
		return sorted(plist, key=operator.attrgetter('myCm'), reverse=True)
