def download_medmnist( dataset:str, # The name of the MedMNIST dataset (e.g., 'pathmnist', 'bloodmnist', etc.). output_dir:str='.', # The path to the directory where the datasets will be saved. download_only:bool=False, # If True, only download the dataset into the output directory without processing. save_images:bool=True, # If True, save the images into the output directory as .png (2D datasets) or multipage .tiff (3D datasets) files.):
Downloads the specified MedMNIST dataset and saves the training, validation, and test datasets into the specified output directory. Images are saved as .png for 2D data and multi-page .tiff for 3D data, organized into folders named after their labels.
Returns: None, saves images in the specified output directory if save_images is True.
def medmnist2df( train_dataset, # MedMNIST training dataset with images and labels val_dataset:NoneType=None, # (Optional) MedMNIST validation dataset with images and labels test_dataset:NoneType=None, # (Optional) MedMNIST test dataset with images and labels mode:str='RGB', # Mode for PIL Image conversion, e.g., 'RGB', 'L')->(<class'pandas.DataFrame'>, <class'pandas.DataFrame'>, <class'pandas.DataFrame'>): # (df_train, df_val, df_test): DataFrames with columns 'image' and 'label'
Convert MedMNIST datasets to DataFrames, with images as PIL Image objects and labels as DataFrame columns.
Missing datasets (if None) are represented by None in the return tuple.
def download_file( url, # The URL of the file to be downloaded output_dir:str='data', # The directory where the downloaded file will be saved extract:bool=True, # If True, decompresses the file if it's in a compressed formathash:NoneType=None, # Optional: You can add a checksum for integrity verification extract_dir:NoneType=None, # Directory to extract the files to):
Download and optionally decompress a single file using Pooch.
def download_files( urls, # A list of URLs to download output_dir:str='data', # The directory or list of directories where the downloaded files will be saved extract:bool=True, # If True, decompresses the files if they are in a compressed format hash_list:NoneType=None, # Optional: A list of checksums for integrity verification corresponding to each URL extract_dir:NoneType=None, # Directory to extract the files to):
Download and optionally decompress multiple files using Pooch.
def download_dataset( base_url, # The base URL from which the files will be downloaded. expected_checksums, # A dictionary mapping file names to their expected checksums. file_names, # A dictionary mapping task identifiers to file names. output_dir, # The directory where the downloaded files will be saved. processor:NoneType=None, # A function to process the downloaded data.):
Download a dataset using Pooch and save it to the specified output directory.
def download_dataset_from_csv( csv_file, # Path to the CSV file containing file names and checksums. base_url, # The base URL from which the files will be downloaded. output_dir, # The directory where the downloaded files will be saved. processor:NoneType=None, # A function to process the downloaded data. rows:NoneType=None, # Specific row indices to download. If None, download all rows. prepend_mdf5:bool=True, # If True, prepend 'md5:' to the checksums.):
Download a dataset using Pooch and save it to the specified output directory, reading file names and checksums from a CSV file.
# Specify the directory where you want to save the downloaded filesoutput_directory ="./_test_folder"# Define the base URL for the MSD datasetbase_url ='https://s3.ap-northeast-1.wasabisys.com/gigadb-datasets/live/pub/10.5524/100001_101000/100888/'download_dataset_from_csv('./data_examples/FMD_dataset_info.csv', base_url, output_directory, rows=[6])
The dataset has been successfully downloaded and saved to: ./_test_folder
def aics_pipeline( n_images_to_download:int=40, # Number of images to download image_save_dir:NoneType=None, # Directory to save the images col:str='SourceReadPath', # Column name for image paths in the data manifest):
def manifest2csv( signal, # List of paths to signal images target, # List of paths to target images paths:NoneType=None, # List of paths to images train_fraction:float=0.8, # Fraction of data to use for training data_save_path:str='./', # Path to save the CSV files train:str='train.csv', # Name of the training CSV file test:str='test.csv', # Name of the test CSV file identifier:NoneType=None, # Identifier to add to the paths):