source:orange-bioinformatics/widgets/chipstat.py@920:7ab8f849f64f

Revision 920:7ab8f849f64f, 12.2 KB checked in by peterjuv <peterjuv@…>, 5 years ago (diff)

Line
1## Automatically adapted for numpy.oldnumeric Oct 04, 2007 by
2
3import math
4import numpy
5import numpy.oldnumeric as Numeric, numpy.oldnumeric.ma as MA
6import numpy.oldnumeric.linear_algebra as LinearAlgebra
7import numpyExtn
8import os, os.path
9import orange
10
11#########################################################################
12## standardization of profiles / arrays
13#########################################################################
14
15def standardize_arrays(et, robust=True):
16    """Returns a new example table where values are standardized to mean zero and unit variance columnwise.
17    Option: parametric (mean/stddev) or non-parametric (median/mad).
18    """
19    ma = orng2ma(et)
20    if robust:
22    else:
23        return ma2orng_keepClassMetas(scaleStd(centerMean(ma)), et)
24
25
26def standardize_genes(et, robust=True):
27    """Returns a new example table where values are standardized to mean zero and unit variance rowwise.
28    Option: parametric (mean/stddev) or non-parametric (median/mad).
29    """
30    ma = orng2ma(et)
31    if robust:
33    else:
34        return ma2orng_keepClassMetas(scaleStd(centerMean(ma,1),1), et)
35
36
37#########################################################################
38## merge replicas by mean, median, min, max
39#########################################################################
40
41def merge_replicas(aETStruct, type):
42    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
43    type = ["mean" | "median" | "min" | "max"]
44    """
45    shape = [0,0,0]
46    et0 = aETStruct[0][1][0]                                            # the first example table
47    shape[0] = len(et0)                                                 # number of examples (genes)
48    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
49    mergedETStruct = []
50    if type == "mean":
51        merge_func = MA.average
52    elif type == "median":
53        merge_func = numpyExtn.medianMA
54    elif type == "min":
55        merge_func = numpyExtn.minMA
56    elif type == "max":
57        merge_func = numpyExtn.maxMA
58    else:
59        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
60    for st, etList in aETStruct:
61        shape[2] = len(etList)
62        ma3d = MA.zeros(shape, Numeric.Float)
63        for idx, et in enumerate(etList):
64            ma3d[:,:,idx] = orng2ma(et)
65        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
66    return mergedETStruct
67
68
69###################################################################################
70## HELPER FUNCTIONS
71###################################################################################
72
73def scaleStd(nm, axis=0):
74    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
75    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
76    return numpyExtn.swapaxesMA(nm / numpyExtn.stdMA(nm,0), 0, axis)
77
79    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
80    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
81    return numpyExtn.swapaxesMA(nm / numpyExtn.madMA(nm,0), 0, axis)
82
83
84def centerMean(nm, axis=0):
85    """Returns new masked numarray with values centered by subtracting Median."""
86    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
87    return numpyExtn.swapaxesMA(nm - MA.average(nm,0), 0, axis)
88
89
90def centerMed(nm, axis=0):
91    """Returns new masked numarray with values centered by subtracting Median."""
92    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
93    return numpyExtn.swapaxesMA(nm - numpyExtn.medianMA(nm,0), 0, axis)
94
95
96def getETStruct(path):
97    """Returns a list of tuples (strain, [orngET1, orngET2, ...])
98    """
99    etStruct = []
100    strains = os.listdir(path)
101    for st in strains:
102        pathSt = path + "\\" + st
103        replicas = os.listdir(pathSt)
104        stEts = []
105        for rep in replicas:
106            stEts.append(orange.ExampleTable(pathSt + "\\" + rep))
107        etStruct.append((st, stEts))
108    return etStruct
109
110
111def etStruct2ma3d(aETStruct):
112    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
113    """
114    shape = [0,0,0]
115    et0 = aETStruct[0][1][0]                                            # the first example table
116    shape[0] = len(et0)                                                 # number of examples (genes)
117    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
118    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)
119    ma3d = MA.zeros(shape, Numeric.Float)
120    k = 0
121    for st, etList in aETStruct:
122        for et in etList:
123            ma3d[:,:,k] = orng2ma(et)
124            k += 1
125    return ma3d
126
127
128def orng2ma(aExampleTable):
129    """Converts orange.ExampleTable to MA.array based on the attribute values.
130    rows correspond to examples, columns correspond to attributes, class values are left out
131    missing values and attributes of types other than orange.FloatVariable are masked
132    """
133    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")
134    ma = MA.array(vals, Numeric.PyObject)
135    if aExampleTable.domain.classVar != None:
136        ma = ma[:,:-1]
137    mask = MA.where(MA.equal(ma, "?"), 1, 0)
138    for varIdx, var in enumerate(aExampleTable.domain.attributes):
139        if type(var) != orange.FloatVariable:
142
143
145##    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
146##    masked values converted to "?" (don't knows)
147##    arr2d.shape[1] must be equal to the number of the attributes of the given domain
148##    domain attributes mut be of type orange.FloatVariable
149##    """
150##    arr2d = MA.asarray(arr2d, Numeric.PyObject)
151##    assert MA.rank(arr2d) == 2, "2d array expected"
152##    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
154##        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
156##    else:
158
160    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
161    masked values converted to "?" (don't knows)
162    arr2d.shape[1] must be equal to the number of the attributes of the given domain
163    domain attributes mut be of type orange.FloatVariable
165    """
166    arr2d = numpy.ma.asarray(arr2d, numpy.object)
167    assert numpy.ma.rank(arr2d) == 2, "2d array expected"
168    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
170        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
172    else:
174
175
176def ma2orng_keepClassMetas(arr2d, aExampleTable):
177    """Creates new example table where attribute values correspond to the given 2D array, class and meta attributes remain unchanged.
178    """
179    arr2d = MA.asarray(arr2d, Numeric.PyObject)
180    assert MA.rank(arr2d) == 2, "2D array expected"
181    assert arr2d.shape[0] == len(aExampleTable), "arr2d.shape[0] != len(aExampleTable)"
182    assert arr2d.shape[1] == len(aExampleTable.domain.attributes), "arr2d.shape[1] != len(aExampleTable.domain.attributes)"
183    domAtt = orange.Domain(aExampleTable.domain.attributes, None)
184    if aExampleTable.domain.classVar != None:
185        domClassMeta = orange.Domain([aExampleTable.domain.classVar])
186    else:
187        domClassMeta = orange.Domain([])
189    etAtt = orange.ExampleTable(domAtt, arr2d.tolist("?"))
190    etClassMeta = orange.ExampleTable(domClassMeta, aExampleTable)
191    return orange.ExampleTable(aExampleTable.domain, orange.ExampleTable([etAtt, etClassMeta]))
192
193
194
195if __name__ == "__main__":
196
197    import Meda.Preproc
199    import Dicty
201    import Dicty.DAnnotation
202
203##    myPath = r"C:\Python23\Lib\site-packages\orange\OrangeWidgets\Genomics\chipdata"
204##    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy"
205    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy_yakApufA"
206
208        ddbList = Dicty.DAnnotation.getDDBList()
209        if not os.path.exists(path):
210            os.mkdir(path)
213            pathSt = path + "\\" + st
214            if not os.path.exists(pathSt):
215                os.mkdir(pathSt)
218                et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
220                for eIdx,e in enumerate(et):
221                    e["DDB"] = ddbList[eIdx]
222                if numGenes:
223                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes]))
224                else:
225                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
226
227    # generate orange example table structure
228##    generateETStruct(myPath, Dicty.DData.BR_ACS())
229
231##    etStruct = getETStruct(myPath)
232##    DN = Dicty.DData.DData_Nancy()
233##    rglc = DN.getReplicaGroupListCopy()
234##    DNpufAyakA = DN.getSubData([rglc[DN.getIdxStrain("pufA")], rglc[DN.getIdxStrain("yakA")], rglc[DN.getIdxStrain("yakApufA")]])
235##    ma3d = DNpufAyakA.getRaw3dAll()
236
237    # test orngET <-> MA
238##    m2d1 = DNpufAyakA.getRaw2d(DNpufAyakA.replicas[1])
239##    et = ma2orng(m2d1, Meda.Preproc.getTcDomain(13, False, [], None))
240##    m2d2 = orng2ma(et)
241##    print "conversion orngET <-> MA OK?", MA.add.reduce(MA.add.reduce(m2d1 - m2d2 > 0.001)) == 0
242##    print "masked places OK?", MA.count(m2d1) == MA.count(m2d2)
243
244    # test conversion to etStruct
246
247    # test standardization
248##    etA = standardize_arrays(etStruct[0][1][1])
250##    etB = standardize_genes(etStruct[0][1][1])