Group
Extension

Convert-Pheno/lib/Convert/Pheno/PXF.pm

package Convert::Pheno::PXF;

use strict;
use warnings;
use autodie;
use feature qw(say);
use Data::Dumper;
use Convert::Pheno::Utils::Default qw(get_defaults);
use Convert::Pheno::Utils::Mapping;
use Exporter 'import';
our @EXPORT = qw(do_pxf2bff);

my $DEFAULT = get_defaults();

#############
#############
#  PXF2BFF  #
#############
#############

sub do_pxf2bff {
    my ( $self, $data ) = @_;
    my $sth = $self->{sth};

  # *** IMPORTANT ****
  # PXF three top-level elements are usually split in files:
  # - phenopacket.json ( usually - 1 individual per file)
  # - cohort.json (info on mutliple individuals)
  # - family.json (info related to one or multiple individuals).
  # These 3 files dont't contain their respective objects at the root level (/).
  #
  # However, top-elements might be combined into a single file (e.g., pxf.json),
  # as a result, certain files may contain objects for top-level elements:
  # - /phenopacket
  # - /cohort
  # - /family
  #
  # In this context, we only accept top-level phenopackets,
  # while the other two types will be categorized as "info".

    # We create cursors for top-level elements
    # 1 - phenopacket (mandatory)
    my $phenopacket =
      exists $data->{phenopacket} ? $data->{phenopacket} : $data;

    # Validate format
    die "Are you sure that your input is not already a bff?\n"
      unless validate_format( $phenopacket, 'pxf' );

    # 2, 3 - /cohort and /family (unlikely)
    # NB: They usually contain info on many individuals and their own files)
    my $cohort = exists $data->{family} ? $data->{cohort} : undef;
    my $family = exists $data->{family} ? $data->{family} : undef;

    # Normalize the hash for medical_actions + medicalActions = medicalActions
    if ( exists $phenopacket->{medical_actions} ) {

       # NB: The delete function returns the value of the deleted key-value pair
        $phenopacket->{medicalActions} = delete $phenopacket->{medical_actions};
    }

# CNAG files have 'meta_data' nomenclature, but PXF documentation uses 'metaData'
# We search for both 'meta_data' and 'metaData' and simply display the
    if ( exists $phenopacket->{meta_data} ) {

       # NB: The delete function returns the value of the deleted key-value pair
        $phenopacket->{metaData} = delete $phenopacket->{meta_data};
    }

    ####################################
    # START MAPPING TO BEACON V2 TERMS #
    ####################################

    # *** IMPORTANT ***
    # biosamples => can not be mapped to individuals (is Biosamples)
    # interpretations => does not have equivalent
    # files => idem
    # They will added to {info}

    # NB: In PXF some terms are = []

    # Initiate BFF structure
    my $individual = {};

    # ========
    # diseases
    # ========
    _map_diseases( $phenopacket, $individual );

    # ========
    # exposures
    # ========
    _map_exposures( $phenopacket, $individual );

    # ================
    # geographicOrigin
    # ================
    # NA

    # ==
    # id
    # ==
    _map_id( $phenopacket, $individual );

    # ====
    # info
    # ====
    _map_info( $phenopacket, $cohort, $family, $individual );

    # =========================
    # interventionsOrProcedures
    # =========================
    _map_interventions_or_procedures( $phenopacket, $individual );

    # =============
    # karyotypicSex
    # =============
    _map_karyotypicSex( $phenopacket, $individual );

    # =========
    # measures
    # =========
    _map_measures( $phenopacket, $individual );

    # =========
    # pedigrees
    # =========
    # See above {info}{phenopacket}{pedigree} => singular!!!

    # ==================
    # phenotypicFeatures
    # ==================
    _map_phenotypic_features( $phenopacket, $individual );

    # ===
    # sex
    # ===
    _map_sex( $self, $phenopacket, $individual );

    # ==========
    # treatments
    # ==========
    _map_treatments( $phenopacket, $individual );

    ##################################
    # END MAPPING TO BEACON V2 TERMS #
    ##################################

    # print Dumper $individual;
    return $individual;
}

################################################################################
# Helper subs extracted from do_pxf2bff (preserving original comments)
################################################################################

sub _map_diseases {
    my ( $phenopacket, $individual ) = @_;

    # ========
    # diseases
    # ========
    if ( exists $phenopacket->{diseases} ) {
        for my $pxf_disease ( @{ $phenopacket->{diseases} } ) {
            my $disease = $pxf_disease;    # Ref-copy-only
            $disease->{diseaseCode} = $disease->{term};
            $disease->{ageOfOnset}  = $disease->{onset}
              if exists $disease->{onset};

            # Check and normalize keys if they exist
            for (qw/excluded negated/) {
                $disease->{$_} = $disease->{$_} if exists $disease->{$_};
            }

            # Clean analog terms if exist
            for (qw/term onset/) {
                delete $disease->{$_} if exists $disease->{$_};
            }

            push @{ $individual->{diseases} }, $disease;
        }
    }
}

sub _map_exposures {
    my ( $phenopacket, $individual ) = @_;

    # ========
    # exposures
    # ========
    if ( exists $phenopacket->{exposures} ) {
        for my $pxf_exposure ( @{ $phenopacket->{exposures} } ) {
            my $exposure = $pxf_exposure;    # Ref-copy-only
            $exposure->{exposureCode} = $exposure->{type};
            $exposure->{date} =
              substr( $exposure->{occurrence}{timestamp}, 0, 10 );

            # Required properties
            $exposure->{ageAtExposure} = $DEFAULT->{iso8601duration};
            $exposure->{duration}      = $DEFAULT->{duration};
            unless ( exists $exposure->{unit} ) {
                $exposure->{unit} = $DEFAULT->{ontology_term};
            }

            # Clean analog terms if exist
            for (qw/type occurence/) {
                delete $exposure->{$_} if exists $exposure->{$_};
            }

            push @{ $individual->{exposures} }, $exposure;
        }
    }
}

sub _map_id {
    my ( $phenopacket, $individual ) = @_;

    # ==
    # id
    # ==
    if ( exists $phenopacket->{subject}{id} ) {
        $individual->{id} = $phenopacket->{subject}{id};
    }
}

sub _map_info {
    my ( $phenopacket, $cohort, $family, $individual ) = @_;

    # ====
    # info
    # ====
    # *** IMPORTANT ***
    # Here we set data that do not fit anywhere else

    # Miscelanea for top-level 'phenopacket'
    for my $term (
        qw(dateOfBirth genes interpretations metaData variants files biosamples pedigree)
      )
    {
        $individual->{info}{phenopacket}{$term} = $phenopacket->{$term}
          if exists $phenopacket->{$term};
    }

    # Miscelanea for top-levels 'cohort' and 'family'
    $individual->{info}{cohort} = $cohort if defined $cohort;
    $individual->{info}{family} = $family if defined $family;
}

sub _map_interventions_or_procedures {
    my ( $phenopacket, $individual ) = @_;

    # =========================
    # interventionsOrProcedures
    # =========================
    if ( exists $phenopacket->{medicalActions} ) {
        for my $action ( @{ $phenopacket->{medicalActions} } ) {
            if ( exists $action->{procedure} ) {
                my $procedure = $action->{procedure};    # Ref-copy-only
                $procedure->{procedureCode} =
                  exists $action->{procedure}{code}
                  ? $action->{procedure}{code}
                  : $DEFAULT->{ontology_term};
                $procedure->{ageOfProcedure} =
                  exists $action->{procedure}{performed}
                  ? $action->{procedure}{performed}
                  : $DEFAULT->{timestamp};

                # Clean analog terms if exist
                for (qw/code performed/) {
                    delete $procedure->{$_} if exists $procedure->{$_};
                }

                push @{ $individual->{interventionsOrProcedures} }, $procedure;
            }
        }
    }
}

sub _map_karyotypicSex {
    my ( $phenopacket, $individual ) = @_;

    # =============
    # karyotypicSex
    # =============
    if ( exists $phenopacket->{subject}{karyotypicSex} ) {
        $individual->{karyotypicSex} = $phenopacket->{subject}{karyotypicSex};
    }
}

sub _map_measures {
    my ( $phenopacket, $individual ) = @_;

    # =========
    # measures
    # =========
    if ( exists $phenopacket->{measurements} ) {
        for my $measurement ( @{ $phenopacket->{measurements} } ) {
            my $measure = $measurement;    # Ref-copy-only

            $measure->{assayCode} = $measure->{assay};

            # Process remotely complexValue
            # s/type/quantityType/
            map_complexValue( $measure->{complexValue} )
              if exists $measure->{complexValue};

            # Assign depending on PXF
            $measure->{measurementValue} =
                exists $measure->{value}        ? $measure->{value}
              : exists $measure->{complexValue} ? $measure->{complexValue}
              :                                   $DEFAULT->{value};
            $measure->{observationMoment} = $measure->{timeObserved}
              if exists $measure->{timeObserved};

            # Clean analog terms if exist
            for (qw/assay value complexValue/) {
                delete $measure->{$_} if exists $measure->{$_};
            }

            push @{ $individual->{measures} }, $measure;
        }
    }
}

sub _map_phenotypic_features {
    my ( $phenopacket, $individual ) = @_;

    # ==================
    # phenotypicFeatures
    # ==================
    if ( exists $phenopacket->{phenotypicFeatures} ) {
        for my $feature ( @{ $phenopacket->{phenotypicFeatures} } ) {
            my $phenotypicFeature = $feature;    # Ref-copy-only

            # *** IMPORTANT ****
            # In v2.0.0 BFF 'evidence' is object but in PXF is array of objects

            # Check and normalize keys if they exist
            for (qw/excluded negated/) {
                $phenotypicFeature->{excluded} = $phenotypicFeature->{$_}
                  if exists $phenotypicFeature->{$_};
            }

            $phenotypicFeature->{featureType} = $phenotypicFeature->{type}
              if exists $phenotypicFeature->{type};

            # Clean analog terms if exist
            for (qw/negated type/) {
                delete $phenotypicFeature->{$_}
                  if exists $phenotypicFeature->{$_};
            }

            push @{ $individual->{phenotypicFeatures} }, $phenotypicFeature;
        }
    }
}

sub _map_sex {
    my ( $self, $phenopacket, $individual ) = @_;

    # ===
    # sex
    # ===
    if ( exists $phenopacket->{subject}{sex}
        && $phenopacket->{subject}{sex} ne '' )
    {
        $individual->{sex} = map_ontology_term(
            {
                query    => $phenopacket->{subject}{sex},
                column   => 'label',
                ontology => 'ncit',
                self     => $self
            }
        );
    }
}

sub _map_treatments {
    my ( $phenopacket, $individual ) = @_;

    # ==========
    # treatments
    # ==========
    if ( exists $phenopacket->{medicalActions} ) {
        for my $action ( @{ $phenopacket->{medicalActions} } ) {
            if ( exists $action->{treatment} ) {
                my $treatment = $action->{treatment};    # Ref-copy-only
                $treatment->{treatmentCode} =
                  exists $action->{treatment}{agent}
                  ? $action->{treatment}{agent}
                  : $DEFAULT->{ontology_term};

                # Clean analog terms if exist
                delete $treatment->{agent} if exists $treatment->{agent};

                # doseIntervals needs some parsing
                if ( exists $treatment->{doseIntervals} ) {

                    # Required properties:
                    #   - scheduleFrequency
                    #   - quantity
                    for ( @{ $treatment->{doseIntervals} } ) {

                        # quantity
                        unless ( exists $_->{quantity} ) {
                            $_->{quantity} = $DEFAULT->{quantity};
                        }

                        # scheduleFrequency
                        unless ( exists $_->{scheduleFrequency} ) {
                            $_->{scheduleFrequency} = $DEFAULT->{ontology_term};
                        }
                    }
                }

                push @{ $individual->{treatments} }, $treatment;
            }
        }
    }
}

#----------------------------------------------------------------------
# Helper subs
#----------------------------------------------------------------------

sub map_complexValue {
    my $complexValue = shift;

    # "typedQuantities": [
    #            {
    #              "type": {
    #                "label": "Visual Acuity",
    #                "id": "NCIT:C87149"
    #              },
    #              "quantity": {
    #                "unit": {
    #                  "id": "NCIT:C48570",
    #                  "label": "Percent Unit"
    #                },
    #                "value": 100
    #              }
    #            }
    #  }

    # Modifying the original ref
    for ( @{ $complexValue->{typedQuantities} } ) {
        $_->{quantityType} = delete $_->{type};
    }

    return 1;
}

# Function to normalize a value to a Boolean
sub to_boolean {
    my $value = shift;
    print Dumper $value;
    return JSON::XS::true
      if $value && $value ne 'false';    # Non-empty string and not 'false'
    return JSON::XS::false;              # Empty, 'false', or undef
}

1;


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.