Changeset 1785:b3a53ba8ada0 in orangebioinformatics
 Timestamp:
 05/21/13 13:45:13 (11 months ago)
 Branch:
 default
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

_bioinformatics/obiGeneSetSig.py
r1775 r1785 731 731 return at 732 732 733 def estimate_linear_fit(data, i): 734 """ 735 Chen et 2008 write about tscore of the least square fit in the 736 original article, but here we can use just the ttest, because the 737 tscores obtained are exactly the same (the authors used the 738 the more general version as they supported continous outcomes). 739 """ 740 741 """ 742 #implementation that could also support continous outcomes 743 x = numpy.array([ ex[i].value for ex in data ]) 744 y = numpy.array([ int(ex[1]) for ex in data ]) #here classes are 0 and 1 745 ret = scipy.stats.linregress(x,y) 746 b = ret[0] 747 seb = ret[4] 748 return b/seb 749 """ 750 """ 751 #per mathword.wolfram.com  results are the same 752 c = numpy.cov(x,y) 753 n = len(x) 754 b = c[0,1]/c[0,0] #the same result as from li 755 s = math.sqrt((c[1,1]*n  b*c[0,1]*n)/(n2)) 756 seb = s/math.sqrt(c[0,0]*n) 757 return b/seb 758 """ 759 return obiExpression.MA_t_test()(i, data) 760 761 762 class SPCA(ParametrizedTransformation): 763 """ Per Chen et al. Supervised principal component analysis for 764 gene set enrichment of microarray data with continuous or survival 765 outcomes. Bioinformatics, 2008. """ 766 767 def __init__(self, **kwargs): 768 self.threshold = kwargs.pop("threshold", None) 769 self.top = kwargs.pop("top", None) 770 self.atleast = kwargs.pop("atleast", 0) 771 super(SPCA, self).__init__(**kwargs) 772 773 def _get_par(self, datao): 774 scores = [ estimate_linear_fit(datao, a) for a in range(len(datao.domain.attributes)) ] 775 scores = list(enumerate(map(abs, scores))) 776 select = None 777 if self.threshold is not None: 778 select = [ i for i,s in scores if s > self.threshold ] 779 elif self.top is not None: 780 select = nth(sorted(scores, key=lambda x: x[1])[:self.top], 0) 781 782 print sorted(scores, key=lambda x: x[1]) 783 784 if len(select) < self.atleast: 785 select = nth(sorted(scores, key=lambda x: x[1])[:self.atleast], 0) 786 787 if select == None: 788 somethingWrongWithSelection 789 790 doms = Orange.data.Domain([ datao.domain.attributes[i] for i in select ], datao.domain.class_var) 791 datas = Orange.data.Table(doms, datao) 792 793 return select, pca(datas) 794 795 def _use_par(self, arr, constructt): 796 select, constructt = constructt 797 select = set(select) 798 799 arr = [ arr[i].value for i in range(len(arr.domain.attributes)) 800 if i in select ] 801 802 evals, evect, xmean = constructt 803 804 arr = arr  xmean # same input transformation  a row in a matrix 805 ev0 = evect[0] #this is a row in a matrix  do a dot product 806 a = numpy.dot(arr, ev0) 807 808 return a 809 733 810 if __name__ == "__main__": 734 811 735 812 data = Orange.data.Table("iris") 813 736 814 gsets = obiGeneSets.collections({ 737 815 #"ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], … … 761 839 print '\n'.join([ a + ": " +str(b) for a,b in ol]) 762 840 763 ass = Assess(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0)841 ass = SPCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, top=0) 764 842 ar = to_old_dic(ass.domain, data[:5]) 765 843 pp2(ar)
Note: See TracChangeset
for help on using the changeset viewer.