Source code for biallelic.models

"""Data models for genomic aberrations and biallelic inactivation hits.

This module defines the core data structures used throughout the biallelic
inactivation analysis pipeline, including enums for aberration types and
classes for representing samples, donors, and genomic variations.
"""

from enum import Enum
from typing import Optional
from biallelic.misc import camel_case_split


[docs] class Gender(Enum): """Biological sex classification for donor samples. Attributes: Unknown: Sex not specified or unknown (value: 0) Male: Male donor (value: 1) Female: Female donor (value: 2) """ Unknown = 0 Male = 1 Female = 2
[docs] class OmicsType(Enum): """Data type for genomic analysis. Represents the type of omics data being analyzed (genomic mutations, epigenetic modifications, or gene expression). Attributes: Genomics: DNA sequence variants and structural changes (value: 1) Methylomics: DNA methylation status (value: 2) Transcriptomics: Gene expression data (value: 3) """ Genomics = 1 Methylomics = 2 Transcriptomics = 3
[docs] class AberrationType(Enum): """Types of genomic aberrations detected in cancer samples. Encompasses somatic mutations, structural variants, copy number changes, germline variants, and epigenetic modifications. Attributes: SV: Structural variant (value: 1) SNV: Single nucleotide variant (value: 2) INDEL: Small insertion/deletion (value: 3) CNN_LOH: Copy number neutral loss of heterozygosity (value: 4) GAIN_LOH: Copy number gain with loss of heterozygosity (value: 5) GERM_SNV: Germline single nucleotide variant (value: 6) HOM_LOSS: Homozygous loss (deletion) (value: 7) HET_LOSS: Heterozygous loss (deletion) (value: 8) GERM_HET_LOSS: Germline heterozygous loss (value: 9) GERM_HOM_LOSS: Germline homozygous loss (value: 10) GERM_HOM_SNV: Germline homozygous SNV (value: 11) METHYL: Promoter methylation silencing (value: 12) AMP: High-level copy number amplification (value: 13) GAIN: Low-level copy number gain (value: 14) """ SV = 1 SNV = 2 INDEL = 3 CNN_LOH = 4 GAIN_LOH = 5 GERM_SNV = 6 HOM_LOSS = 7 HET_LOSS = 8 GERM_HET_LOSS = 9 GERM_HOM_LOSS = 10 GERM_HOM_SNV = 11 METHYL = 12 AMP = 13 GAIN = 14
[docs] class DoubleHitType(Enum): """Types of biallelic inactivation patterns detected. Represents the combination of two independent hits on both alleles of a gene, following the Knudson two-hit hypothesis for tumor suppressor inactivation. Attributes: SomLoss_SomLoss: Two somatic copy number losses SomLoss_SomSnv: Somatic loss + somatic SNV SomCnLoh_SomSnv: Somatic CNN-LOH + somatic SNV SomGainLoh_SomSnv: Somatic gain-LOH + somatic SNV SomLoss_SomSv: Somatic loss + somatic structural variant SomSnv_SomSnv: Two somatic SNVs SomLoss_SomIndel: Somatic loss + somatic indel GermLoss_SomLoss: Germline loss + somatic loss GermLoss_GermLoss: Two germline losses GermLoss_GermSnp: Germline loss + germline SNP GermLoss_GermSv: Germline loss + germline structural variant GermSnp_SomLoss: Germline SNP + somatic loss GermSv_SomLoss: Germline SV + somatic loss GermSnp_GermSnp: Two germline SNPs SomLoss_Methyl: Somatic loss + promoter methylation SubclonalLoss_SomSnv: Subclonal loss + somatic SNV SomLoss_SubclonalSnv: Somatic loss + subclonal SNV """ SomLoss_SomLoss = 1 SomLoss_SomSnv = 2 SomCnLoh_SomSnv = 3 SomGainLoh_SomSnv = 4 SomLoss_SomSv = 5 SomSnv_SomSnv = 6 SomLoss_SomIndel = 7 GermLoss_SomLoss = 8 GermLoss_GermLoss = 9 GermLoss_GermSnp = 10 GermLoss_GermSv = 11 GermSnp_SomLoss = 12 GermSv_SomLoss = 13 GermSnp_GermSnp = 14 SomLoss_Methyl = 15 SubclonalLoss_SomSnv = 16 SomLoss_SubclonalSnv = 17 def __str__(self) -> str: """Return human-readable string representation of hit type. Converts CamelCase enum name to snake_case with slash separator. Example: SomLoss_SomSnv → som_loss/som_snv Returns: String representation with format "hit1/hit2" """ hits = self.name.split("_") return "%s/%s" % ( "_".join(camel_case_split(hits[0])).lower(), "_".join(camel_case_split(hits[1])).lower(), )
[docs] class SampleDonor: """Metadata for a donor sample in the analysis cohort. Represents information about a biological sample and its source donor, including gender, data type, tumor purity, and ploidy. Attributes: sample_id: Unique identifier for the sample donor_id: Unique identifier for the donor gender: Biological sex of the donor (from Gender enum) omics: Type of omics data for this sample (from OmicsType enum) cellularity: Tumor purity as fraction 0-1 (default: 0) ploidy: Average ploidy of sample (default: 2 for diploid) matching_sample_id: ID of matched normal sample if available (default: "") Example: >>> from biallelic.models import SampleDonor, Gender, OmicsType >>> donor = SampleDonor( ... sample_id="TCGA-A1-A0SB-01", ... donor_id="TCGA-A1-A0SB", ... gender=Gender.Female, ... omics=OmicsType.Genomics, ... cellularity=0.8, ... ploidy=2.0, ... matching_sample_id="TCGA-A1-A0SB-10" ... ) """ def __init__( self, sample_id: str, donor_id: str, gender: Gender, omics: OmicsType = OmicsType.Genomics, cellularity: float = 0, ploidy: float = 2, matching_sample_id: str = "", ) -> None: """Initialize a SampleDonor instance. Args: sample_id: Unique identifier for the sample donor_id: Unique identifier for the donor gender: Biological sex classification (Gender enum) omics: Type of omics data (OmicsType enum, default: Genomics) cellularity: Tumor purity fraction 0-1 (default: 0) ploidy: Average ploidy level (default: 2.0) matching_sample_id: Matched normal sample ID if available (default: "") """ self.sample_id = sample_id self.donor_id = donor_id self.gender = gender.name self.omics = omics self.cellularity = cellularity self.ploidy = ploidy self.matching_sample_id = matching_sample_id
[docs] class Aberration: """A genomic aberration detected in a tumor sample. Represents a single genomic event (SNV, indel, SV, copy number change, methylation, etc.) in a specific genomic location within a sample. Attributes: chrom: Chromosome identifier (e.g., "1", "X", "MT") start: 0-based start coordinate (integer) end: 0-based end coordinate (integer) type: Type of aberration (AberrationType enum name as string) subtype: Functional consequence or specific type (e.g., "missense", "frameshift") sample_id: Sample identifier where aberration was detected vaf: Variant allele frequency 0-1 (Optional, None if unavailable) n_copy: Copy number at this location (Optional, None if unavailable) gene: Gene name or symbol ("." if intergenic) id: Unique identifier for this aberration ("." if not available) Example: >>> from biallelic.models import Aberration, AberrationType >>> ab = Aberration( ... chrom="17", ... start=7577121, ... end=7577121, ... aberration_type=AberrationType.SNV, ... aberration_subtype="missense", ... sample_id="TCGA-A1-A0SB-01", ... vaf=0.45, ... gene="TP53", ... id="rs1234567" ... ) """ def __init__( self, chrom: str, start: int, end: int, aberration_type: AberrationType, aberration_subtype: str, sample_id: str, vaf: Optional[float] = None, n_copy: Optional[int] = None, gene: str = ".", id: str = ".", ) -> None: """Initialize an Aberration instance. Args: chrom: Chromosome identifier start: 0-based start coordinate end: 0-based end coordinate aberration_type: Type of aberration (AberrationType enum) aberration_subtype: Functional consequence or type detail sample_id: Sample where aberration was detected vaf: Variant allele frequency 0-1 (default: None) n_copy: Copy number at locus (default: None) gene: Gene name/symbol or "." for intergenic (default: ".") id: Unique identifier or "." if unavailable (default: ".") """ self.chrom = str(chrom) self.start = int(start) self.end = int(end) self.type = str(aberration_type.name) self.subtype = str(aberration_subtype) self.sample_id = str(sample_id) self.vaf = vaf self.n_copy = n_copy if self.vaf is not None: self.vaf = float(self.vaf) if self.n_copy is not None: self.n_copy = int(self.n_copy) self.gene = str(gene) self.id = str(id)
[docs] class DoubleHit: """A biallelic inactivation event in a tumor suppressor gene. Represents two independent mutational hits on both alleles of a gene, following the Knudson two-hit hypothesis for tumor suppressor inactivation. This is the core output of the biallelic discovery analysis. Attributes: gene: Gene name or symbol affected by biallelic inactivation cytoband: Cytogenetic band location of the gene (e.g., "17p13.1") first_hit: Identifier for the first mutational event first_hit_type: Type/category of the first hit (e.g., "SNV", "HOM_LOSS") second_hit: Identifier for the second mutational event second_hit_type: Type/category of the second hit hit_type: Classification of hit combination (DoubleHitType enum as string) sample_id: Sample identifier where the biallelic event was detected donor_id: Donor/patient identifier id: Unique identifier for this biallelic hit event ("." if unavailable) Example: >>> from biallelic.models import DoubleHit, DoubleHitType >>> hit = DoubleHit( ... gene="TP53", ... cytoband="17p13.1", ... first_hit="rs1234567", ... first_hit_type="SNV", ... second_hit="LOSS_17p", ... second_hit_type="HOM_LOSS", ... hit_type=DoubleHitType.SomLoss_SomSnv, ... sample_id="TCGA-A1-A0SB-01", ... donor_id="TCGA-A1-A0SB", ... id="BH_000001" ... ) """ def __init__( self, gene: str, cytoband: str, first_hit: str, first_hit_type: str, second_hit: str, second_hit_type: str, hit_type: DoubleHitType, sample_id: str, donor_id: str, id: str = ".", ) -> None: """Initialize a DoubleHit instance. Args: gene: Gene name or symbol affected cytoband: Cytogenetic band location (e.g., "17p13.1") first_hit: Identifier/description of first mutational event first_hit_type: Functional type of first hit second_hit: Identifier/description of second mutational event second_hit_type: Functional type of second hit hit_type: Classification of hit pair combination (DoubleHitType) sample_id: Sample identifier donor_id: Donor/patient identifier id: Unique identifier or "." if unavailable (default: ".") """ self.gene = str(gene) self.cytoband = str(cytoband) self.first_hit = str(first_hit) self.first_hit_type = str(first_hit_type) self.second_hit = str(second_hit) self.second_hit_type = str(second_hit_type) self.hit_type = str(hit_type) self.sample_id = str(sample_id) self.donor_id = str(donor_id) self.id = str(id)