source: orange-bioinformatics/Orange/bioinformatics/widgets/chipstat.py @ 1625:cefeb35cbfc9

Revision 1625:cefeb35cbfc9, 12.2 KB checked in by mitar, 2 years ago (diff)

Moving files around.

Line 
1## Automatically adapted for numpy.oldnumeric Oct 04, 2007 by
2
3import math
4import numpy
5import numpy.oldnumeric as Numeric, numpy.oldnumeric.ma as MA
6import numpy.oldnumeric.linear_algebra as LinearAlgebra
7import numpyExtn
8import os, os.path
9import orange
10
11#########################################################################
12## standardization of profiles / arrays
13#########################################################################
14
15def standardize_arrays(et, robust=True):
16    """Returns a new example table where values are standardized to mean zero and unit variance columnwise.
17    Option: parametric (mean/stddev) or non-parametric (median/mad).
18    """
19    ma = orng2ma(et)
20    if robust:
21        return ma2orng_keepClassMetas(scaleMad(centerMed(ma)), et)
22    else:
23        return ma2orng_keepClassMetas(scaleStd(centerMean(ma)), et)
24   
25
26def standardize_genes(et, robust=True):
27    """Returns a new example table where values are standardized to mean zero and unit variance rowwise.
28    Option: parametric (mean/stddev) or non-parametric (median/mad).
29    """
30    ma = orng2ma(et)
31    if robust:
32        return ma2orng_keepClassMetas(scaleMad(centerMed(ma,1),1), et)
33    else:
34        return ma2orng_keepClassMetas(scaleStd(centerMean(ma,1),1), et)
35
36
37#########################################################################
38## merge replicas by mean, median, min, max
39#########################################################################
40
41def merge_replicas(aETStruct, type):
42    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
43    type = ["mean" | "median" | "min" | "max"]
44    """
45    shape = [0,0,0]
46    et0 = aETStruct[0][1][0]                                            # the first example table
47    shape[0] = len(et0)                                                 # number of examples (genes)
48    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
49    mergedETStruct = []
50    if type == "mean":
51        merge_func = MA.average
52    elif type == "median":
53        merge_func = numpyExtn.medianMA
54    elif type == "min":
55        merge_func = numpyExtn.minMA
56    elif type == "max":
57        merge_func = numpyExtn.maxMA
58    else:
59        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
60    for st, etList in aETStruct:
61        shape[2] = len(etList)
62        ma3d = MA.zeros(shape, Numeric.Float)
63        for idx, et in enumerate(etList):
64            ma3d[:,:,idx] = orng2ma(et)
65        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
66    return mergedETStruct
67       
68
69###################################################################################
70## HELPER FUNCTIONS
71###################################################################################
72
73def scaleStd(nm, axis=0):
74    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
75    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
76    return numpyExtn.swapaxesMA(nm / numpyExtn.stdMA(nm,0), 0, axis)
77
78def scaleMad(nm, axis=0):
79    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
80    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
81    return numpyExtn.swapaxesMA(nm / numpyExtn.madMA(nm,0), 0, axis)
82
83
84def centerMean(nm, axis=0):
85    """Returns new masked numarray with values centered by subtracting Median."""
86    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
87    return numpyExtn.swapaxesMA(nm - MA.average(nm,0), 0, axis)
88
89
90def centerMed(nm, axis=0):
91    """Returns new masked numarray with values centered by subtracting Median."""
92    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
93    return numpyExtn.swapaxesMA(nm - numpyExtn.medianMA(nm,0), 0, axis)
94
95
96def getETStruct(path):
97    """Returns a list of tuples (strain, [orngET1, orngET2, ...])
98    """
99    etStruct = []
100    strains = os.listdir(path)
101    for st in strains:
102        pathSt = path + "\\" + st
103        replicas = os.listdir(pathSt)
104        stEts = []
105        for rep in replicas:
106            stEts.append(orange.ExampleTable(pathSt + "\\" + rep))
107        etStruct.append((st, stEts))
108    return etStruct
109
110
111def etStruct2ma3d(aETStruct):
112    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
113    """
114    shape = [0,0,0]
115    et0 = aETStruct[0][1][0]                                            # the first example table
116    shape[0] = len(et0)                                                 # number of examples (genes)
117    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
118    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)
119    ma3d = MA.zeros(shape, Numeric.Float)
120    k = 0
121    for st, etList in aETStruct:
122        for et in etList:
123            ma3d[:,:,k] = orng2ma(et)
124            k += 1
125    return ma3d
126
127
128def orng2ma(aExampleTable):
129    """Converts orange.ExampleTable to MA.array based on the attribute values.
130    rows correspond to examples, columns correspond to attributes, class values are left out
131    missing values and attributes of types other than orange.FloatVariable are masked
132    """
133    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")
134    ma = MA.array(vals, Numeric.PyObject)
135    if aExampleTable.domain.classVar != None:
136        ma = ma[:,:-1]
137    mask = MA.where(MA.equal(ma, "?"), 1, 0)
138    for varIdx, var in enumerate(aExampleTable.domain.attributes):
139        if type(var) != orange.FloatVariable:
140            mask[:,varIdx] = Numeric.ones(len(aExampleTable))
141    return MA.array(MA.array(ma, Numeric.PyObject, mask=mask).filled(1e20), Numeric.Float, mask=mask)
142
143
144##def ma2orng(arr2d, aDomain):
145##    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
146##    masked values converted to "?" (don't knows)
147##    arr2d.shape[1] must be equal to the number of the attributes of the given domain
148##    domain attributes mut be of type orange.FloatVariable
149##    """
150##    arr2d = MA.asarray(arr2d, Numeric.PyObject)
151##    assert MA.rank(arr2d) == 2, "2d array expected"
152##    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
153##    if aDomain.classVar != None:
154##        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
155##        return orange.ExampleTable(aDomain, et)
156##    else:
157##        return orange.ExampleTable(aDomain, arr2d.tolist("?"))
158
159def ma2orng(arr2d, aDomain):
160    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
161    masked values converted to "?" (don't knows)
162    arr2d.shape[1] must be equal to the number of the attributes of the given domain
163    domain attributes mut be of type orange.FloatVariable
164    2009-06-23: adapted to numpy
165    """
166    arr2d = numpy.ma.asarray(arr2d, numpy.object)
167    assert numpy.ma.rank(arr2d) == 2, "2d array expected"
168    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
169    if aDomain.classVar != None:
170        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
171        return orange.ExampleTable(aDomain, et)
172    else:
173        return orange.ExampleTable(aDomain, arr2d.tolist("?"))
174
175
176def ma2orng_keepClassMetas(arr2d, aExampleTable):
177    """Creates new example table where attribute values correspond to the given 2D array, class and meta attributes remain unchanged.
178    """
179    arr2d = MA.asarray(arr2d, Numeric.PyObject)
180    assert MA.rank(arr2d) == 2, "2D array expected"
181    assert arr2d.shape[0] == len(aExampleTable), "arr2d.shape[0] != len(aExampleTable)"
182    assert arr2d.shape[1] == len(aExampleTable.domain.attributes), "arr2d.shape[1] != len(aExampleTable.domain.attributes)"
183    domAtt = orange.Domain(aExampleTable.domain.attributes, None)
184    if aExampleTable.domain.classVar != None:
185        domClassMeta = orange.Domain([aExampleTable.domain.classVar])
186    else:
187        domClassMeta = orange.Domain([])
188    domClassMeta.addmetas(aExampleTable.domain.getmetas())
189    etAtt = orange.ExampleTable(domAtt, arr2d.tolist("?"))
190    etClassMeta = orange.ExampleTable(domClassMeta, aExampleTable)
191    return orange.ExampleTable(aExampleTable.domain, orange.ExampleTable([etAtt, etClassMeta]))
192
193
194
195if __name__ == "__main__":
196   
197    import Meda.Preproc
198    reload(Meda.Preproc)
199    import Dicty
200    reload(Dicty)
201    import Dicty.DAnnotation
202
203##    myPath = r"C:\Python23\Lib\site-packages\orange\OrangeWidgets\Genomics\chipdata"
204##    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy"
205    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy_yakApufA"
206
207    def generateETStruct(path, medaData, numGenes=None):
208        ddbList = Dicty.DAnnotation.getDDBList()
209        if not os.path.exists(path):
210            os.mkdir(path)
211        medaData = Dicty.DData.DData_Nancy()   
212        for st in medaData.strains:
213            pathSt = path + "\\" + st
214            if not os.path.exists(pathSt):
215                os.mkdir(pathSt)
216            for rep in medaData.strain2replicaList(st):
217                ma2d = medaData.getRaw2d(rep)
218                et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
219                et.domain.addmeta(orange.newmetaid(), orange.StringVariable("DDB"))
220                for eIdx,e in enumerate(et):
221                    e["DDB"] = ddbList[eIdx]
222                if numGenes:
223                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes]))
224                else:
225                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
226
227    # generate orange example table structure
228##    generateETStruct(myPath, Dicty.DData.BR_ACS())
229
230    # load data
231##    etStruct = getETStruct(myPath)
232##    DN = Dicty.DData.DData_Nancy()
233##    rglc = DN.getReplicaGroupListCopy()
234##    DNpufAyakA = DN.getSubData([rglc[DN.getIdxStrain("pufA")], rglc[DN.getIdxStrain("yakA")], rglc[DN.getIdxStrain("yakApufA")]])
235##    ma3d = DNpufAyakA.getRaw3dAll()
236
237    # test orngET <-> MA
238##    m2d1 = DNpufAyakA.getRaw2d(DNpufAyakA.replicas[1])
239##    et = ma2orng(m2d1, Meda.Preproc.getTcDomain(13, False, [], None))
240##    m2d2 = orng2ma(et)
241##    print "conversion orngET <-> MA OK?", MA.add.reduce(MA.add.reduce(m2d1 - m2d2 > 0.001)) == 0
242##    print "masked places OK?", MA.count(m2d1) == MA.count(m2d2)
243
244    # test conversion to etStruct
245##    print "conversion to etStruct OK?", MA.add.reduce(MA.add.reduce(MA.add.reduce((etStruct2ma3d(etStruct) - DNpufAyakA.getRaw3dAll() > 0.001)))) == 0
246
247    # test standardization
248##    etA = standardize_arrays(etStruct[0][1][1])
249##    print "std.arrays OK?", MA.add.reduce(MA.add.reduce(orng2ma(etA) - Meda.Preproc.scaleMad(Meda.Preproc.centerMed(ma3d[:,:,1])) > 0.1)) == 0
250##    etB = standardize_genes(etStruct[0][1][1])
251##    print "std.genes OK?", MA.add.reduce(MA.add.reduce(orng2ma(etB) - Meda.Preproc.scaleMad(Meda.Preproc.centerMed(ma3d[:,:,1],1),1) > 0.1)) == 0
252##   
253    # test merge replicas
254##    mergedETStruct_mean = merge_replicas(etStruct, "mean")
255##    print "merge replicas: mean OK?", MA.add.reduce(MA.add.reduce(orng2ma(mergedETStruct_mean[0][1][0]) - DNpufAyakA.getMean2d("pufA") > 0.001)) == 0
256##    mergedETStruct_median = merge_replicas(etStruct, "median")
257##    mergedETStruct_min = merge_replicas(etStruct, "min")
258##    mergedETStruct_max = merge_replicas(etStruct, "max")
259##    print "merge replicas: min < median?", MA.add.reduce(MA.add.reduce(orng2ma(mergedETStruct_min[0][1][0]) > orng2ma(mergedETStruct_median[0][1][0]))) == 0
260##    print "merge replicas: median < max?", MA.add.reduce(MA.add.reduce(orng2ma(mergedETStruct_median[0][1][0]) > orng2ma(mergedETStruct_max[0][1][0]))) == 0
261##    print "merge replicas: abs(min - median)?", MA.add.reduce(MA.add.reduce(MA.absolute(orng2ma(mergedETStruct_mean[0][1][0]) - orng2ma(mergedETStruct_median[0][1][0]))))
262
Note: See TracBrowser for help on using the repository browser.