Skip to content

Commit

Permalink
integration
Browse files Browse the repository at this point in the history
  • Loading branch information
SuperBianC committed Jan 6, 2024
1 parent ac3a71f commit e9b45d0
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ scMulan/.ipynb_checkpoints
scMulan/utils/__pycache__
scMulan/reference/GeneSymbolUniform/__pycache__
scMulan/reference/GeneSymbolUniform/.ipynb_checkpoints

update.sh

# except
!Data/README.md
Expand Down
87 changes: 13 additions & 74 deletions Tutorial-integration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
"id": "12f61ec3-f21b-4fce-a652-3172e7b8c7fa",
"metadata": {},
"source": [
"#### we provide the lung dataset from SCIB (doi.org/10.1038/s41592-021-01336-8)\n",
"you can download the sampled dataset from: https://figshare.com/ndownloader/files/24539942\n",
"/nfs/public/cell_gpt_data/Intergation_Data/Lung/Lung_atlas_public.h5ad"
"#### we use the lung dataset (https://figshare.com/ndownloader/files/24539942) in SCIB (doi.org/10.1038/s41592-021-01336-8)"
]
},
{
Expand Down Expand Up @@ -62,44 +60,9 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "f2116c92-e174-4088-a5fe-76858c51740e",
"metadata": {},
"outputs": [],
"source": [
"adata_path = '/nfs/public/cell_gpt_data/Intergation_Data/Lung/Lung_atlas_public.h5ad' # the data path"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b306ffe4-7aae-4d3b-ac2e-647b90bbd8d0",
"metadata": {},
"outputs": [],
"source": [
"adata = sc.read_h5ad(adata_path)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "3cef709f-7c25-4219-96f6-30df9268b2e6",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ead734f1869e44f78dcfacccf1a9f19d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0.00/972M [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"outputs": [],
"source": [
"adata = sc.read(\n",
" \"Data/lung_atlas.h5ad\",\n",
Expand All @@ -109,31 +72,8 @@
},
{
"cell_type": "code",
"execution_count": 32,
"id": "37bbc683-8287-48eb-9bec-deae69c579a0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AnnData object with n_obs × n_vars = 32472 × 15148\n",
" obs: 'dataset', 'location', 'nGene', 'nUMI', 'patientGroup', 'percent.mito', 'protocol', 'sanger_type', 'size_factors', 'sampling_method', 'batch', 'cell_type', 'donor'\n",
" layers: 'counts'"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adata"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1996cf88-0d43-4abd-92c7-eb7d850ded79",
"execution_count": 5,
"id": "69e0cab9-3c9a-47b7-98b5-1348537c46c8",
"metadata": {},
"outputs": [
{
Expand All @@ -144,7 +84,7 @@
" layers: 'counts'"
]
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -163,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"id": "90c52bec-48bc-407c-b049-84cf80139ca2",
"metadata": {},
"outputs": [
Expand All @@ -180,7 +120,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15148/15148 [00:12<00:00, 1250.63it/s]"
"Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15148/15148 [00:12<00:00, 1238.62it/s]\n"
]
},
{
Expand All @@ -194,23 +134,22 @@
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13972/13972 [00:09<00:00, 1505.76it/s]\n"
"Processing: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13972/13972 [00:07<00:00, 1758.63it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Shape of output data is (32472, 42117). It should have 42117 genes with cell number unchanged.\n",
"h5ad file saved in:/nfs/public/chenyx/scMulan_test/uniformed_adata/lung_uniformed.h5ad\n",
"report file saved in: /nfs/public/chenyx/scMulan_test/uniformed_adata/lung_report.csv\n"
"h5ad file saved in:/home/bianhaiyang/projects/cellgpt_v1/tools/scMulan_package/Data/lung_uniformed.h5ad\n",
"report file saved in: /home/bianhaiyang/projects/cellgpt_v1/tools/scMulan_package/Data/lung_report.csv\n"
]
}
],
"source": [
"adata_GS_uniformed = GeneSymbolUniform(input_adata=adata,\n",
" output_dir=\"/nfs/public/chenyx/scMulan_test/uniformed_adata/\",\n",
" output_dir=\"Data/\",\n",
" output_prefix='lung')"
]
},
Expand All @@ -231,7 +170,7 @@
"source": [
"## you can read the saved uniformed adata\n",
"\n",
"#adata_GS_uniformed=sc.read_h5ad('/nfs/public/chenyx/scMulan_test/uniformed_adata/liver_uniformed.h5ad')"
"#adata_GS_uniformed=sc.read_h5ad('Data/lung_uniformed.h5ad')"
]
},
{
Expand Down Expand Up @@ -263,7 +202,7 @@
"metadata": {},
"outputs": [],
"source": [
"# norm and log1p count matrix\n",
"# norm and log1p, if adata is count matrix\n",
"if adata_GS_uniformed.X.max() > 10:\n",
" sc.pp.normalize_total(adata_GS_uniformed, target_sum=1e4) \n",
" sc.pp.log1p(adata_GS_uniformed)"
Expand Down

0 comments on commit e9b45d0

Please sign in to comment.