from geneformer import EmbExtractor

# initiate EmbExtractor
embex = EmbExtractor(model_type="CellClassifier",
                     num_classes=2,
                     # filter_data={"major.type":["myeloid","Tcell","tumor","oligodendrocyte"]},
                     # filter_data={"major.type":["myeloid","Tcell","tumor"]},
                     max_ncells=300000,
                     emb_layer=-1,  # -1 or 0, -1 is preferred.
                     emb_label=["condition","tumor_anno","celltype_1", "orig.ident"],
                     labels_to_plot=["condition"],
                     forward_batch_size=20,
                     nproc=20)
print("initialization ok")

initialization ok

# extracts embedding from input data
# example dataset: https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files/cell_classification/disease_classification/human_dcm_hcm_nf.dataset
embs = embex.extract_embs("../geneformer-12L-30M/",
                          "./test_dataset.dataset",
                          "../PBMC_result/",
                          "PBMC_pretrain_condition")
print("extracts embedding ok")
# model dir, dataset dir, output dir, output predix

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../geneformer-12L-30M/ and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py:260, in _lazy_init()
    259 try:
--> 260     queued_call()
    261 except Exception as e:

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py:145, in _check_capability()
    144 for d in range(device_count()):
--> 145     capability = get_device_capability(d)
    146     major = capability[0]

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py:381, in get_device_capability(device)
    369 r"""Gets the cuda capability of a device.
    370 
    371 Args:
   (...)
    379     tuple(int, int): the major and minor cuda capability of the device
    380 """
--> 381 prop = get_device_properties(device)
    382 return prop.major, prop.minor

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py:399, in get_device_properties(device)
    398     raise AssertionError("Invalid device id")
--> 399 return _get_device_properties(device)

RuntimeError: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. 

The above exception was the direct cause of the following exception:

DeferredCudaCallError                     Traceback (most recent call last)
Cell In[15], line 3
      1 # extracts embedding from input data
      2 # example dataset: https://huggingface.co/datasets/ctheodoris/Genecorpus-30M/tree/main/example_input_files/cell_classification/disease_classification/human_dcm_hcm_nf.dataset
----> 3 embs = embex.extract_embs("../geneformer-12L-30M/",
      4                           "./test_dataset.dataset",
      5                           "../PBMC_result/",
      6                           "PBMC_pretrain_condition")
      7 print("extracts embedding ok")
      8 # model dir, dataset dir, output dir, output predix

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/geneformer/emb_extractor.py:388, in EmbExtractor.extract_embs(self, model_directory, input_data_file, output_directory, output_prefix)
    386 filtered_input_data = load_and_filter(self.filter_data, self.nproc, input_data_file)
    387 downsampled_data = downsample_and_sort(filtered_input_data, self.max_ncells)
--> 388 model = load_model(self.model_type, self.num_classes, model_directory)
    389 layer_to_quant = quant_layers(model)+self.emb_layer
    390 embs = get_embs(model,
    391                 downsampled_data,
    392                 self.emb_mode,
   (...)
    395                 self.forward_batch_size,
    396                 self.summary_stat)

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/geneformer/in_silico_perturber.py:78, in load_model(model_type, num_classes, model_directory)
     76 # put the model in eval mode for fwd pass
     77 model.eval()
---> 78 model = model.to("cuda:0")
     79 return model

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/transformers/modeling_utils.py:2053, in PreTrainedModel.to(self, *args, **kwargs)
   2048     raise ValueError(
   2049         "`.to` is not supported for `4-bit` or `8-bit` bitsandbytes models. Please use the model as it is, since the"
   2050         " model has already been set to the correct devices and casted to the correct `dtype`."
   2051     )
   2052 else:
-> 2053     return super().to(*args, **kwargs)

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:1145, in Module.to(self, *args, **kwargs)
   1141         return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                     non_blocking, memory_format=convert_to_format)
   1143     return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)
-> 1145 return self._apply(convert)

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:797, in Module._apply(self, fn)
    795 def _apply(self, fn):
    796     for module in self.children():
--> 797         module._apply(fn)
    799     def compute_should_use_set_data(tensor, tensor_applied):
    800         if torch._has_compatible_shallow_copy_type(tensor, tensor_applied):
    801             # If the new tensor has compatible tensor type as the existing tensor,
    802             # the current behavior is to change the tensor in-place using `.data =`,
   (...)
    807             # global flag to let the user control whether they want the future
    808             # behavior of overwriting the existing tensor or not.

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:820, in Module._apply(self, fn)
    816 # Tensors stored in modules are graph leaves, and we don't want to
    817 # track autograd history of `param_applied`, so we have to use
    818 # `with torch.no_grad():`
    819 with torch.no_grad():
--> 820     param_applied = fn(param)
    821 should_use_set_data = compute_should_use_set_data(param, param_applied)
    822 if should_use_set_data:

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/nn/modules/module.py:1143, in Module.to.<locals>.convert(t)
   1140 if convert_to_format is not None and t.dim() in (4, 5):
   1141     return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None,
   1142                 non_blocking, memory_format=convert_to_format)
-> 1143 return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)

File /mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py:264, in _lazy_init()
    261         except Exception as e:
    262             msg = (f"CUDA call failed lazily at initialization with error: {str(e)}\n\n"
    263                    f"CUDA call was originally invoked at:\n\n{orig_traceback}")
--> 264             raise DeferredCudaCallError(msg) from e
    265 finally:
    266     delattr(_tls, 'is_initializing')

DeferredCudaCallError: CUDA call failed lazily at initialization with error: device >= 0 && device < num_gpus INTERNAL ASSERT FAILED at "/opt/conda/conda-bld/pytorch_1682343970094/work/aten/src/ATen/cuda/CUDAContext.cpp":50, please report a bug to PyTorch. 

CUDA call was originally invoked at:

['  File "<frozen runpy>", line 198, in _run_module_as_main\n', '  File "<frozen runpy>", line 88, in _run_code\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel_launcher.py", line 17, in <module>\n    app.launch_new_instance()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/traitlets/config/application.py", line 1043, in launch_instance\n    app.start()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 736, in start\n    self.io_loop.start()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start\n    self.asyncio_loop.run_forever()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/asyncio/base_events.py", line 607, in run_forever\n    self._run_once()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once\n    handle._run()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/asyncio/events.py", line 80, in _run\n    self._context.run(self._callback, *self._args)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue\n    await self.process_one()\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 505, in process_one\n    await dispatch(*args)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell\n    await result\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 740, in execute_request\n    reply_content = await reply_content\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 422, in do_execute\n    res = shell.run_cell(\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 546, in run_cell\n    return super().run_cell(*args, **kwargs)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell\n    result = self._run_cell(\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell\n    result = runner(coro)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner\n    coro.send(None)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async\n    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes\n    if await self.run_code(code, result, async_=asy):\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code\n    exec(code_obj, self.user_global_ns, self.user_ns)\n', '  File "/tmp/ipykernel_17272/2773193863.py", line 1, in <module>\n    from geneformer import EmbExtractor\n', '  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load\n', '  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked\n', '  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked\n', '  File "<frozen importlib._bootstrap_external>", line 940, in exec_module\n', '  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/geneformer/__init__.py", line 2, in <module>\n    from . import pretrainer\n', '  File "<frozen importlib._bootstrap>", line 1232, in _handle_fromlist\n', '  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed\n', '  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load\n', '  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked\n', '  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked\n', '  File "<frozen importlib._bootstrap_external>", line 940, in exec_module\n', '  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/geneformer/pretrainer.py", line 14, in <module>\n    import torch\n', '  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load\n', '  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked\n', '  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked\n', '  File "<frozen importlib._bootstrap_external>", line 940, in exec_module\n', '  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/__init__.py", line 1146, in <module>\n    _C._initExtension(manager_path())\n', '  File "<frozen importlib._bootstrap>", line 1176, in _find_and_load\n', '  File "<frozen importlib._bootstrap>", line 1147, in _find_and_load_unlocked\n', '  File "<frozen importlib._bootstrap>", line 690, in _load_unlocked\n', '  File "<frozen importlib._bootstrap_external>", line 940, in exec_module\n', '  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py", line 197, in <module>\n    _lazy_call(_check_capability)\n', '  File "/mnt/disk/mwang/conda3/lib/python3.11/site-packages/torch/cuda/__init__.py", line 195, in _lazy_call\n    _queued_calls.append((callable, traceback.format_stack()))\n']

# plot UMAP of cell embeddings
# note: scanpy umap necessarily saves figs to figures directory
embex.plot_embs(embs=embs,
                plot_style="umap",
                max_ncells_to_plot=300000,
                output_directory="../PBMC_result/",
                output_prefix="emb_plot_pretrain_condition")
print("uamp OK")

/scratch/PI/jgwang/mwang/anaconda3/envs/py310/lib/python3.10/site-packages/anndata/utils.py:252: UserWarning: X converted to numpy array with dtype float64
  warnings.warn(f"{name} converted to numpy array with dtype {arr.dtype}")

# plot heatmap of cell embeddings
# embex.plot_embs(embs=embs, 
#                 plot_style="heatmap",
#                 max_ncells_to_plot=300000,
#                 output_directory="../scRNA_result/",
#                 output_prefix="emb_plot_heatmap")