Revision 1636:10d234fdadb9, 12.2 KB checked in by mitar, 2 years ago (diff)

Restructuring because we will not be using namespaces.

Line
1## Automatically adapted for numpy.oldnumeric Oct 04, 2007 by
2
3from __future__ import absolute_import
4
5import math
6import os, os.path
7
8import numpy
9import numpy.oldnumeric as Numeric, numpy.oldnumeric.ma as MA
10import numpy.oldnumeric.linear_algebra as LinearAlgebra
11
12import orange
13
14from . import numpyExtn
15
16#########################################################################
17## standardization of profiles / arrays
18#########################################################################
19
20def standardize_arrays(et, robust=True):
21    """Returns a new example table where values are standardized to mean zero and unit variance columnwise.
22    Option: parametric (mean/stddev) or non-parametric (median/mad).
23    """
24    ma = orng2ma(et)
25    if robust:
27    else:
28        return ma2orng_keepClassMetas(scaleStd(centerMean(ma)), et)
29
30
31def standardize_genes(et, robust=True):
32    """Returns a new example table where values are standardized to mean zero and unit variance rowwise.
33    Option: parametric (mean/stddev) or non-parametric (median/mad).
34    """
35    ma = orng2ma(et)
36    if robust:
38    else:
39        return ma2orng_keepClassMetas(scaleStd(centerMean(ma,1),1), et)
40
41
42#########################################################################
43## merge replicas by mean, median, min, max
44#########################################################################
45
46def merge_replicas(aETStruct, type):
47    """Returns a list of tuples (strain, [avrg_orngET]) where aETStruct corresponds to a list of tuples (strain, [orngET1, orngET2, ...]);
48    type = ["mean" | "median" | "min" | "max"]
49    """
50    shape = [0,0,0]
51    et0 = aETStruct[0][1][0]                                            # the first example table
52    shape[0] = len(et0)                                                 # number of examples (genes)
53    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
54    mergedETStruct = []
55    if type == "mean":
56        merge_func = MA.average
57    elif type == "median":
58        merge_func = numpyExtn.medianMA
59    elif type == "min":
60        merge_func = numpyExtn.minMA
61    elif type == "max":
62        merge_func = numpyExtn.maxMA
63    else:
64        raise AttributeError, "type = ['mean' | 'median' | 'min' | 'max']"
65    for st, etList in aETStruct:
66        shape[2] = len(etList)
67        ma3d = MA.zeros(shape, Numeric.Float)
68        for idx, et in enumerate(etList):
69            ma3d[:,:,idx] = orng2ma(et)
70        mergedETStruct.append((st, [ma2orng_keepClassMetas(merge_func(ma3d, 2), etList[0])]))
71    return mergedETStruct
72
73
74###################################################################################
75## HELPER FUNCTIONS
76###################################################################################
77
78def scaleStd(nm, axis=0):
79    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
80    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
81    return numpyExtn.swapaxesMA(nm / numpyExtn.stdMA(nm,0), 0, axis)
82
84    """Returns new masked numarray with values scaled by dividing by Median Absolute Difference: MAD = median(|val(i)-median(val(i))|)."""
85    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
86    return numpyExtn.swapaxesMA(nm / numpyExtn.madMA(nm,0), 0, axis)
87
88
89def centerMean(nm, axis=0):
90    """Returns new masked numarray with values centered by subtracting Median."""
91    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
92    return numpyExtn.swapaxesMA(nm - MA.average(nm,0), 0, axis)
93
94
95def centerMed(nm, axis=0):
96    """Returns new masked numarray with values centered by subtracting Median."""
97    nm = numpyExtn.swapaxesMA(MA.asarray(nm, Numeric.Float), 0, axis)
98    return numpyExtn.swapaxesMA(nm - numpyExtn.medianMA(nm,0), 0, axis)
99
100
101def getETStruct(path):
102    """Returns a list of tuples (strain, [orngET1, orngET2, ...])
103    """
104    etStruct = []
105    strains = os.listdir(path)
106    for st in strains:
107        pathSt = path + "\\" + st
108        replicas = os.listdir(pathSt)
109        stEts = []
110        for rep in replicas:
111            stEts.append(orange.ExampleTable(pathSt + "\\" + rep))
112        etStruct.append((st, stEts))
113    return etStruct
114
115
116def etStruct2ma3d(aETStruct):
117    """Converts a list of tuples (strain, [orngET1, orngET2, ...]) to a 3D masked array and returns it.
118    """
119    shape = [0,0,0]
120    et0 = aETStruct[0][1][0]                                            # the first example table
121    shape[0] = len(et0)                                                 # number of examples (genes)
122    shape[1] = len(et0.domain.attributes)                               # number of attributes (time points)
123    shape[2] = Numeric.add.reduce(map(lambda x: len(x[1]), aETStruct))  # number of ETs (replicas over all strains)
124    ma3d = MA.zeros(shape, Numeric.Float)
125    k = 0
126    for st, etList in aETStruct:
127        for et in etList:
128            ma3d[:,:,k] = orng2ma(et)
129            k += 1
130    return ma3d
131
132
133def orng2ma(aExampleTable):
134    """Converts orange.ExampleTable to MA.array based on the attribute values.
135    rows correspond to examples, columns correspond to attributes, class values are left out
136    missing values and attributes of types other than orange.FloatVariable are masked
137    """
138    vals = aExampleTable.native(0, substituteDK="?", substituteDC="?", substituteOther="?")
139    ma = MA.array(vals, Numeric.PyObject)
140    if aExampleTable.domain.classVar != None:
141        ma = ma[:,:-1]
142    mask = MA.where(MA.equal(ma, "?"), 1, 0)
143    for varIdx, var in enumerate(aExampleTable.domain.attributes):
144        if type(var) != orange.FloatVariable:
147
148
150##    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
151##    masked values converted to "?" (don't knows)
152##    arr2d.shape[1] must be equal to the number of the attributes of the given domain
153##    domain attributes mut be of type orange.FloatVariable
154##    """
155##    arr2d = MA.asarray(arr2d, Numeric.PyObject)
156##    assert MA.rank(arr2d) == 2, "2d array expected"
157##    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
159##        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
161##    else:
163
165    """Converts MA.array to orange.ExampleTable where examples correspond to rows and attributes to columns.
166    masked values converted to "?" (don't knows)
167    arr2d.shape[1] must be equal to the number of the attributes of the given domain
168    domain attributes mut be of type orange.FloatVariable
170    """
171    arr2d = numpy.ma.asarray(arr2d, numpy.object)
172    assert numpy.ma.rank(arr2d) == 2, "2d array expected"
173    assert len(aDomain.attributes) == arr2d.shape[1], "the shape of the array incompatible with the given domain"
175        et = orange.ExampleTable(orange.Domain(aDomain.attributes, None), arr2d.tolist("?"))
177    else:
179
180
181def ma2orng_keepClassMetas(arr2d, aExampleTable):
182    """Creates new example table where attribute values correspond to the given 2D array, class and meta attributes remain unchanged.
183    """
184    arr2d = MA.asarray(arr2d, Numeric.PyObject)
185    assert MA.rank(arr2d) == 2, "2D array expected"
186    assert arr2d.shape[0] == len(aExampleTable), "arr2d.shape[0] != len(aExampleTable)"
187    assert arr2d.shape[1] == len(aExampleTable.domain.attributes), "arr2d.shape[1] != len(aExampleTable.domain.attributes)"
188    domAtt = orange.Domain(aExampleTable.domain.attributes, None)
189    if aExampleTable.domain.classVar != None:
190        domClassMeta = orange.Domain([aExampleTable.domain.classVar])
191    else:
192        domClassMeta = orange.Domain([])
194    etAtt = orange.ExampleTable(domAtt, arr2d.tolist("?"))
195    etClassMeta = orange.ExampleTable(domClassMeta, aExampleTable)
196    return orange.ExampleTable(aExampleTable.domain, orange.ExampleTable([etAtt, etClassMeta]))
197
198
199
200if __name__ == "__main__":
201
202    import Meda.Preproc
204    import Dicty
206    import Dicty.DAnnotation
207
208##    myPath = r"C:\Python23\Lib\site-packages\orange\OrangeWidgets\Genomics\chipdata"
209##    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy"
210    myPath = r"C:\Documents and Settings\peterjuv\My Documents\Transcriptional Phenotype\DictyData\orngDataStruct_Nancy_yakApufA"
211
213        ddbList = Dicty.DAnnotation.getDDBList()
214        if not os.path.exists(path):
215            os.mkdir(path)
218            pathSt = path + "\\" + st
219            if not os.path.exists(pathSt):
220                os.mkdir(pathSt)
223                et = Meda.Preproc.ma2orng(ma2d,Meda.Preproc.getTcDomain(ma2d.shape[1], False, [], None))
225                for eIdx,e in enumerate(et):
226                    e["DDB"] = ddbList[eIdx]
227                if numGenes:
228                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", orange.ExampleTable(et[:numGenes]))
229                else:
230                    orange.saveTabDelimited(pathSt + "\\" + rep + ".tab", et)
231
232    # generate orange example table structure
233##    generateETStruct(myPath, Dicty.DData.BR_ACS())
234
236##    etStruct = getETStruct(myPath)
237##    DN = Dicty.DData.DData_Nancy()
238##    rglc = DN.getReplicaGroupListCopy()
239##    DNpufAyakA = DN.getSubData([rglc[DN.getIdxStrain("pufA")], rglc[DN.getIdxStrain("yakA")], rglc[DN.getIdxStrain("yakApufA")]])
240##    ma3d = DNpufAyakA.getRaw3dAll()
241
242    # test orngET <-> MA
243##    m2d1 = DNpufAyakA.getRaw2d(DNpufAyakA.replicas[1])
244##    et = ma2orng(m2d1, Meda.Preproc.getTcDomain(13, False, [], None))
245##    m2d2 = orng2ma(et)
246##    print "conversion orngET <-> MA OK?", MA.add.reduce(MA.add.reduce(m2d1 - m2d2 > 0.001)) == 0
247##    print "masked places OK?", MA.count(m2d1) == MA.count(m2d2)
248
249    # test conversion to etStruct
251
252    # test standardization
253##    etA = standardize_arrays(etStruct[0][1][1])
255##    etB = standardize_genes(etStruct[0][1][1])