Source code for metatime.mecmapper

import pandas as pd
import os
import scanpy as sc
import numpy as np
import warnings
import anndata



############ project single cell to mec for creating new adata obj ###########

[docs]def projectMec(expr, mec, glstcol = 'TopGene20'): """ Parameters ---------- expr expression matrix, cell by gene. mec mec table with one column, two types of format accepted format 1, each row is a mec with genes , comma seperated. format 2, each row is a genes, each column is a mec, value is float glstcol Used only when mec is the list format format1, and glstcol is the column name to record comma separated gene list for each mec. Returns ---------- scorepd, cell by signature. Examples --------- >>> scorepd = mecmapper.projectMec( df, mec ) """ score_lst = [] if(isinstance( mec.iloc[0,0], str) ) : # the mec format is format 1. no weighting. for i in mec.index: mec_i = str(i) gs = mec.loc[i, glstcol ].split(',') gs_sub = [ t for t in expr.columns if t in gs ] score = pd.DataFrame( expr[ gs_sub ].mean(axis=1), columns= [ mec_i ] ) score_lst.append( score ) scorepd = pd.concat(score_lst , axis=1).fillna(0) else: # the mec format is not format 1. using values as weight. shared_genes = mec.index.intersection( expr.columns ) module_sharedgenes = mec.loc[ shared_genes ] expr_sharedgenes = expr[ shared_genes ] scorearray = expr_sharedgenes.values.dot( module_sharedgenes.values ) scorepd = pd.DataFrame( scorearray, index = expr.index, columns = mec.columns) return( scorepd )
[docs]def annToDataFrame( adata_input, genescaling = False, layer = 'norm_data'): """ Extract expression matrix from scanpy object. Parameters ---------- adata_input: anndata.AnnData Input scanpy object. genescaling: bool Whether to z-scale extracted feature matrix. layer: str Layer of expression to extract from adata_input Returns ---------- pd.dataframe Extracted expression matrix from scanpy object. """ adata = adata_input.copy() try: df = adata.to_df(layer = layer ) except: warnings.warn('Warning: no '+layer +' layer. using X.') df = adata.to_df() if( genescaling ): df = (df - df.mean())/df.std() return( df )
[docs]def scale(df): """ standardize scaling feature """ return( (df-df.mean())/df.std() )
[docs]def projectMecAnn( adata_input, mec, genescaling = False, sigscaling = True, addon = False, layer = 'norm_data', glstcol = 'TopGene20'): """ Project single cell expression in AnnData to MeCs Calls: projectMeC,annToDataFrame Parameters ---------- adata_input: anndata.AnnData Input scanpy object for gene expression mec: pd.DataFrame mec table with one column, two types of format both accepted format 1, each row is a mec with genes , comma seperated. format 2, each row is a genes, each column is a mec, value is float. genescaling: bool Whether to scale expression on gene level. Recommended to be False. sigscaling: bool Whether to scale projected scores across cells. Recomended and default is True. addon: bool Whether to keep the original adata and append the signautres in obs, or return an independent anndata (pdata) with only projected values (which saves memory). layer: str Layer of expression to extract from adata_input glstcol Used only when mec is the list format format 1, and glstcol is the column name to record comma separated gene list for each mec. Returns ---------- anndata.AnnData If addon is False, return pdata where values are MeC-projected values. If addon is True, return adata where values are same as in adata_input, but with extra obs columns. Examples ---------- >>> pdata = mecmapper.projectMecAnn(adata, mec_score_topg, sigscaling=True, genescaling=False, addon=False) """ adata = adata_input.copy() df = annToDataFrame( adata, genescaling = genescaling, layer = layer ) projected = projectMec(df, mec, glstcol = glstcol ) projected.columns = [ str(t) for t in projected.columns ] if( sigscaling ): projected = scale(projected) if(addon): if( len(projected.columns.intersection(adata.obs.columns)) >0 ): warnings.warn('signature names already in adata.obs. cannot merge. If wish to remove in obs: \n \ adata.obs = adata.obs[[t for t in adata.obs.columns if t not in mecnamedict.keys()] \n or \ adata.obs = adata.obs[[t for t in adata.obs.columns if t[:6]!="score_" ]]]' ) else: adata.obs = adata.obs.merge( projected , how = 'left', left_index = True, right_index = True) adataproj = adata.copy() else: adataproj = anndata.AnnData( projected, obs = adata.obs ) return(adataproj)
[docs]def projectModuleAnn_aucell( adata, module, glstcol = 'TopGene20'): """ Alternative function that projects scRNA data using top genes from MeCs and AUCell module has to be list mode. Parameters ---------- adata: anndata.AnnData Input scanpy object for gene expression module: pd.DataFrame mec table with one column, two types of format accepted format 1, each row is a mec with genes , comma seperated. glstcol Used only when mec is the list format format 1, and glstcol is the column name to record comma separated gene list for each mec. Returns ---------- anndata.AnnData adata with aucell score stored in extra columns starting with 'score_sig_' in adata.obs """ if(not glstcol): glstcol = module.columns[0] for i in module.index: gene_list = module.loc[i, glstcol ].split(',') use_gene_list = adata.var_names.intersection( gene_list ) if(len(use_gene_list)>1): sc.tl.score_genes(adata, use_gene_list, score_name = 'score_sig_'+str(i) ) else: adata.obs[ 'score_sig_'+str(i) ] = 0 return(adata)
""" ### TODO. # Tutorial. mapper+annotator. # Tutorial. differential signature. # Tutorial functional extraction. # metatime calling. (code to be wrapped in pipeline with a tutorial, that's it. ) # A class for projected data. this is per-cell class. #init: score matrix, .obs features, cell location (adata.umap). function and category for features (mecs class.). #fun: plot single singature signature using mec name. plot all signature. plot all signature in # #THEN, a seperate annotator function. takes projected class. takes adata input ( overclustering. ) # THEN, a separate differential signature comparison. # mecs class: print top gene, plot top gene, extract top tf, extract enrichr, extract lisa ranking, extract lisa-mec ranking plot. """