[ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#Head", "@graph": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ", "http://www.nanopub.org/nschema#hasAssertion": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#assertion" } ], "http://www.nanopub.org/nschema#hasProvenance": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#provenance" } ], "http://www.nanopub.org/nschema#hasPublicationInfo": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#pubinfo" } ], "@type": [ "http://www.nanopub.org/nschema#Nanopublication" ] } ] }, { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#provenance", "@graph": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#assertion", "http://www.w3.org/ns/prov#wasAttributedTo": [ { "@id": "https://orcid.org/0000-0003-2581-8370" }, { "@id": "https://orcid.org/0009-0003-5030-0108" } ] } ] }, { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#assertion", "@graph": [ { "@id": "http://id.crossref.org/issn/2451-8492", "http://purl.org/dc/terms/title": [ { "@value": "Data Science" } ] }, { "@id": "https://doi.org/10.3233/DS-230059", "http://purl.org/dc/terms/abstract": [ { "@value": "Measuring data drift is essential in machine learning applications where model scoring (evaluation) is done on data samples that differ from those used in training. The Kullback-Leibler divergence is a common measure of shifted probability distributions, for which discretized versions are invented to deal with binned or categorical data. We present the Unstable Population Indicator, a robust, flexible and numerically stable, discretized implementation of Jeffrey's divergence, along with an implementation in a Python package that can deal with continuous, discrete, ordinal and nominal data in a variety of popular data types. We show the numerical and statistical properties in controlled experiments. It is not advised to employ a common cut-off to distinguish stable from unstable populations, but rather to let that cut-off depend on the use case." } ], "http://purl.org/dc/terms/date": [ { "@value": "2024" } ], "http://purl.org/dc/terms/hasPart": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RA4SqymT32eltSYbr41lDKMBV3Zr8nEBEXRFhfOrN6f3k" } ], "http://purl.org/dc/terms/isPartOf": [ { "@id": "http://id.crossref.org/issn/2451-8492" } ], "http://purl.org/dc/terms/title": [ { "@value": "Measuring Data Drift with the Unstable Population Indicator" } ], "http://purl.org/pav/authoredBy": [ { "@id": "https://orcid.org/0000-0003-2581-8370" }, { "@id": "https://orcid.org/0009-0003-5030-0108" } ], "@type": [ "http://purl.org/spar/fabio/ResourcePaper" ] }, { "@id": "https://orcid.org/0000-0003-2581-8370", "http://schema.org/affiliation": [ { "@id": "https://ror.org/04dkp9463" }, { "@id": "https://ror.org/05xvt9f17" } ], "http://schema.org/email": [ { "@value": "datascience@marcelhaas.com" } ], "@type": [ "http://xmlns.com/foaf/0.1/Person" ], "http://xmlns.com/foaf/0.1/name": [ { "@value": "Marcel R. Haas" } ] }, { "@id": "https://orcid.org/0009-0003-5030-0108", "http://schema.org/affiliation": [ { "@id": "https://ror.org/04b8v1s79" }, { "@id": "https://ror.org/04dkp9463" } ], "http://schema.org/email": [ { "@value": "L.Sibbald@tilburguniversity.edu" } ], "@type": [ "http://xmlns.com/foaf/0.1/Person" ], "http://xmlns.com/foaf/0.1/name": [ { "@value": "Lisette Sibbald" } ] }, { "@id": "https://ror.org/04dkp9463", "@type": [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name": [ { "@value": "Business Intelligence, University of Amsterdam, Spui 21, 1012WX Amsterdam, The Netherlands" } ] }, { "@id": "https://ror.org/05xvt9f17", "@type": [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name": [ { "@value": "Public Health and Primary Care, Leiden University Medical Center, Albinusdreef 2, The Netherlands" } ] }, { "@id": "https://ror.org/04b8v1s79", "@type": [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name": [ { "@value": "Department of Methodology and Statistics and Department of Cognitive Neuropsychology, Tilburg University, Prof. Cobbenhagenlaan 125, 5037 DB Tilburg, The Netherlands" } ] } ] }, { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#pubinfo", "@graph": [ { "@id": "https://orcid.org/0000-0002-1267-0234", "http://xmlns.com/foaf/0.1/name": [ { "@value": "Tobias Kuhn" } ] }, { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ#sig", "http://purl.org/nanopub/x/hasAlgorithm": [ { "@value": "RSA" } ], "http://purl.org/nanopub/x/hasPublicKey": [ { "@value": "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCjDGQCS1S+SRnERDuYDXOugdYUP0efEquHJEEHAbU/uLzBVlga89zqrNPCS7fBE6lArBUWEmT8eLKdMapyqvAzI1J3jUWTMhDJF+XFBkUiuiFfNSc4vJJcmi0yujtnuzXsRIG202jyaP4f5ULoskFwaZOSBZJfiE0dsB3D7DTIAQIDAQAB" } ], "http://purl.org/nanopub/x/hasSignature": [ { "@value": "f5mC5A4mj3VepxZTMLnk8nrgRbIpIorEb3hGe1uEbV+wjaNFsdsOq8Yu9nXj/eWi3SweEqX9cuaHwwUEP1CpdpzQBslMpgVnxEd6g1aJapdDumaL0rGUDktysosShKOLFHSgIZC11+85vcppmGuWqPlxFAZKOdtDV3O1pxg1CB4=" } ], "http://purl.org/nanopub/x/hasSignatureTarget": [ { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ" } ], "http://purl.org/nanopub/x/signedBy": [ { "@id": "https://orcid.org/0000-0002-1267-0234" } ] }, { "@id": "https://w3id.org/kpxl/ios/ds/np/RAp2-E77MOiPhLIbTOtkjV7l_4y1kYc63ZhZaflJ547FQ", "http://purl.org/dc/terms/created": [ { "@value": "2024-02-12T07:10:52.151Z", "@type": "http://www.w3.org/2001/XMLSchema#dateTime" } ], "http://purl.org/dc/terms/creator": [ { "@id": "https://orcid.org/0000-0002-1267-0234" } ], "http://purl.org/dc/terms/license": [ { "@id": "https://creativecommons.org/licenses/by/4.0/" } ], "http://purl.org/nanopub/x/hasNanopubType": [ { "@id": "http://purl.org/spar/fabio/ScholarlyWork" }, { "@id": "https://w3id.org/kpxl/ios/ds/terms/DataScienceNanopub" } ], "http://purl.org/nanopub/x/introduces": [ { "@id": "https://doi.org/10.3233/DS-230059" } ], "http://purl.org/nanopub/x/wasCreatedAt": [ { "@id": "https://nanodash.petapico.org/" } ], "http://www.w3.org/2000/01/rdf-schema#label": [ { "@value": "Article: Measuring Data Drift with the Unstable Population Indicator" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromProvenanceTemplate": [ { "@id": "http://purl.org/np/RAi6zZAwhaJ23Hzg4lIjlPir6Take3ZQp-lS9skfBEwfQ" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromPubinfoTemplate": [ { "@id": "http://purl.org/np/RAA2MfqdBCzmz9yVWjKLXNbyfBNcwsMmOqcNUxkk1maIM" }, { "@id": "http://purl.org/np/RAh1gm83JiG5M6kDxXhaYT1l49nCzyrckMvTzcPn-iv90" }, { "@id": "https://w3id.org/np/RA5R_qv3VsZIrDKd8Mr37x3HoKCsKkwN5tJVqgQsKhjTE" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromTemplate": [ { "@id": "https://w3id.org/np/RAhPFxesdOZq-w6Z8VBfc1aV9hfN6c5FnJ7XjR0dAMn_I" } ] } ] } ]