Wednesday, May 14, 2014

Genotype collapse function using python


HB NA HE HA
Genotype Letter Code Number Code PLINK Code
HB 00



Homozygote B HB 02 00
NA 00 01


NA NA NA 01
HE 00 01 00

heterozygote HE 01 10
HA 00 01 11 11
Homozygote A HA 00 11











02 NA 01 00




02 00







NA 00 01






01 00 01 00





00 00 01 11 11















00 01 10 11




00 00







01 00 01






10 00 01 00





11 00 01 11 11





def collapse(x, y):
    if x == "00" or y == "00":
        return "00"
    elif x == "01" or y == "01":
        return "01"
    elif x == "11" or y == "11":
        return "11"
    elif x == "10" and y == "10":
        return "00"
    else:
        raise Exception("Nonsense combination!")

# test collapse function
allfour = ["00", "01", "10", "11"]
for i in allfour:
    for j in allfour:
        res = collapse(i, j)
        print(res, end="  ")
    print()


# from random import randint
# xint = randint(0, 256)
# yint = randint(0, 256)
def collbyte(xbyte, ybyte):
    x = "{:08b}".format(xbyte)
    xstring = [x[i:i+2] for i in range(0, len(x), 2)]
    y = "{:08b}".format(ybyte)
    ystring = [y[i:i+2] for i in range(0, len(y), 2)]
    collstring = [collapse(xstring[i], ystring[i]) for i in range(4)]
    # fh.write("\n---------------------------------------------\n")
    # fh.write("byte1: {}, binString1: {}".format(xbyte, x) + "\n")
    # fh.write("byte2: {}, binString2: {}".format(ybyte, y) + "\n")
    # fh.write(str(xstring) + "\n")
    # fh.write(str(ystring) + "\n")
    # fh.write(str(collstring) + "\n")
    resbyte = int("".join(collstring), 2)
    return resbyte

import numpy as np
collgen = np.zeros((256, 256), dtype=np.unsignedinteger)
with open("/tmp/collgen.csv", "w") as fh:
    for i in range(256):
        for j in range(256):
            fh.write(str(collbyte(i, j)) + ", ")
        fh.write("\n")

np.savetxt("/tmp/collgen.csv", collgen, delimiter=",")

# def checkbytes(xbyte, ybyte):
#     if collbyte(xbyte, ybyte) != collgen[xbyte][ybyte]:
#         print("Error!")
#
# for i in range(256):
#     for j in range(256):
#         checkbytes(i, j)




0 comments: