These are chat archives for bigdatafoundation/qngene

15th
Feb 2016
Victor Dupuy
@vidup
Feb 15 2016 21:41
from pyspark import SparkConf, SparkContext

# Here I define a function to parse the file, line by line
def parseFile(line):
    fields = line.split()
    CHR = int(fields[0])
    BP = int(fields[1])
    SNP = fields[2]
    A1 = fields[3]
    A2 = fields[4]
    N = int(fields[5])
    P = float(fields[6])
    PR = float(fields[7])
    OR = float(fields[8])
    ORR = float(fields[9])
    Q = float(fields[10])
    I = float(fields[11])
    FRQ = float(fields[12])
    return (CHR,BP,SNP,A1,A2,N,P,PR,OR,ORR,Q,I,FRQ)
    # return (CHR)

conf = SparkConf().setMaster("local[*]").setAppName("LineCount")
sc = SparkContext(conf = conf)

lines = sc.textFile("file:///sparkGOAT/view_dph_hypertension_hbp_baseline_5_6_assoc.meta") #Loading the file

snps = lines.map(parseFile).cache() #cache() allow to do multiple operations faster since data is in memory and not on disk.
snpsCount = snps.count() 

print snpsCount
(Python)
et après il suffit de faire
spark-submit TestData.py
TestData étant le fichier contenant le code ci-dessus
David Lauzon
@davidonlaptop
Feb 15 2016 21:53
super
est-ce qu’on peut télécharger le fichier assoc quelquepart ?
Victor Dupuy
@vidup
Feb 15 2016 22:14
Je vais l'upload