#!/usr/local/anaconda3/bin/python

# After copy/pasting an MyHeritage web page with DNA match data on it,
# this script cleans it up and reformats it.  Suppose you paste a DNA match
# page into someName.raw, you run this program by:
#
#   ./clean.py someName.raw o ownname
#
# where o is owner . m can be used for a matches file. This is done because
# both are different in myHeritage. In myHeritage, you get two type of matches, matches
# with yourself (o)  are different from matches with others (m). The difference is
# that with your own there is only one set of cM for each match, while with mathes
# it always also contains your data.
#
# and it creates a file, someName, with clean output like:
#
# name_of_person_with_dna_matches
# name1|cm1
# name2|cm2
# name3|cm3
# ...
#
# New files for each person in this file should be named as they appear in
# this file.  I.e., file with name name1 contains his common matches, name2
# with hers, and so on.
# Core code by
# Mike Markowski, mike.ab3ap@gmail.com
# May 2021
# MyHeritage adaptation by
# Erik Mols, MSc BEd, epmols@gmail.com

import sys

def clean(dnaFile, sortClean, ownName):
	try:
		f = open(dnaFile, 'r')
	except FileNotFoundError:
		return False # Fail, couldn't find match file.

	cleanFile = dnaFile[:dnaFile.find('.raw')]
	slash = cleanFile.rfind('/')
	dnaName = cleanFile[slash+1:]
	fOut = open(cleanFile, 'w')
	fOut.write('%s\n' % dnaName) # User name whose matches follow.

	fPrev = ''
	nameField = True
	viewMatch = 0
	#dna processing
	if (sortClean == 'o'):
		ownerProcessing(f, fOut)
	elif(sortClean == 'm'):
		matchProcessing(f, fOut, ownName)
	else:
		print("No correct DNA parameter was given")

	return True # Successful reformatting.

def ownerProcessing(f, fOut):
	count = 0
	for line in f:

		if (count == 0):
			fOut.write(line.rstrip("\n"))#first line starts with a name
		test = line.find("Bekijk DNA Match\n")#need to be changed for different language versions! this is DUTCH
		test2 = line.find('(')
		if(test>=0):
			next(f)
			next(f)
			name = next(f)
			name = checkEnd(name)
			name = name.rstrip("\n")
			fOut.write(name)
		elif (test2>=0):
			start = test2
			cM = line[start:]
			cM = correct(cM)
			cM = checkEnd(cM)
			fOut.write('|%s\n' % cM)
		count+=1

	f.close()
	fOut.close()

def matchProcessing(f, fOut, ownName):
	for line in f:
		cM = ""
		if (line.__contains__('%')):
			line = next(f)
			start = line.find('(')
			end = line.find(')')
			cM = line[start:end]
			line = next(f)
			name = line.rstrip('\n')
			fOut.write(name)
			cM = correct(cM)
			fOut.write('|%s\n' % cM)


def checkEnd(tester):
	if(tester == "Ga naar pagina\n"):#only for the dutch version!
		return ""
	else:
		return tester

def correct(cM):
	cM = cM.replace("cM", "")
	cM = cM.replace("(", "")
	cM = cM.replace(")", "")
	cM = cM.rstrip('\n')
	cM = cM.replace(",", ".")
	col = cM.split('.')
	outp = ""
	for iter in col:
		try:
			(int(iter))
			outp = outp + iter
		except:
			outp = outp + "."
			outp = outp + iter[:1]


	return outp

if __name__ == '__main__':
	filename = sys.argv[1]
	if filename.find('.raw') == -1:
		print('File name %s does not end in .raw.' % filename)
	else:
		sortClean = sys.argv[2]
		ownName = sys.argv[3]
		clean(filename, sortClean, ownName)


