Publications
2023
Simone, Lorenzo; Bacciu, Davide
ECGAN: generative adversarial network for electrocardiography Conference
Proceedings of Artificial Intelligence In Medicine 2023 (AIME 2023), 2023.
BibTeX | Tags: bioinformatics, biomedical data, generative model, multivariate time-series
@conference{nokey,
title = {ECGAN: generative adversarial network for electrocardiography},
author = {Lorenzo Simone and Davide Bacciu },
year = {2023},
date = {2023-06-12},
urldate = {2023-06-12},
booktitle = {Proceedings of Artificial Intelligence In Medicine 2023 (AIME 2023)},
keywords = {bioinformatics, biomedical data, generative model, multivariate time-series},
pubstate = {published},
tppubtype = {conference}
}
Bacciu, Davide; Errica, Federico; Gravina, Alessio; Madeddu, Lorenzo; Podda, Marco; Stilo, Giovanni
Deep Graph Networks for Drug Repurposing with Multi-Protein Targets Journal Article Forthcoming
In: IEEE Transactions on Emerging Topics in Computing, 2023, Forthcoming.
BibTeX | Tags: bioinformatics, biomedical data, deep graph networks, deep learning for graphs
@article{Bacciu2023b,
title = {Deep Graph Networks for Drug Repurposing with Multi-Protein Targets},
author = {Davide Bacciu and Federico Errica and Alessio Gravina and Lorenzo Madeddu and Marco Podda and Giovanni Stilo},
year = {2023},
date = {2023-02-01},
journal = {IEEE Transactions on Emerging Topics in Computing, 2023},
keywords = {bioinformatics, biomedical data, deep graph networks, deep learning for graphs},
pubstate = {forthcoming},
tppubtype = {article}
}
2022
Ferrari, Elisa; Gargani, Luna; Barbieri, Greta; Ghiadoni, Lorenzo; Faita, Francesco; Bacciu, Davide
A causal learning framework for the analysis and interpretation of COVID-19 clinical data Journal Article
In: Plos One, vol. 17, no. 5, 2022.
Abstract | Links | BibTeX | Tags: Bayesian networks, bioinformatics, biomedical data, explainable AI, unsupervised learning
@article{DBLP:journals/corr/abs-2105-06998,
title = {A causal learning framework for the analysis and interpretation of COVID-19 clinical data},
author = {Elisa Ferrari and Luna Gargani and Greta Barbieri and Lorenzo Ghiadoni and Francesco Faita and Davide Bacciu},
url = {https://arxiv.org/abs/2105.06998, Arxiv},
doi = {doi.org/10.1371/journal.pone.0268327},
year = {2022},
date = {2022-05-19},
urldate = {2022-04-27},
journal = {Plos One},
volume = {17},
number = {5},
abstract = {We present a workflow for clinical data analysis that relies on Bayesian Structure Learning (BSL), an unsupervised learning approach, robust to noise and biases, that allows to incorporate prior medical knowledge into the learning process and that provides explainable results in the form of a graph showing the causal connections among the analyzed features. The workflow consists in a multi-step approach that goes from identifying the main causes of patient's outcome through BSL, to the realization of a tool suitable for clinical practice, based on a Binary Decision Tree (BDT), to recognize patients at high-risk with information available already at hospital admission time. We evaluate our approach on a feature-rich COVID-19 dataset, showing that the proposed framework provides a schematic overview of the multi-factorial processes that jointly contribute to the outcome. We discuss how these computational findings are confirmed by current understanding of the COVID-19 pathogenesis. Further, our approach yields to a highly interpretable tool correctly predicting the outcome of 85% of subjects based exclusively on 3 features: age, a previous history of chronic obstructive pulmonary disease and the PaO2/FiO2 ratio at the time of arrival to the hospital. The inclusion of additional information from 4 routine blood tests (Creatinine, Glucose, pO2 and Sodium) increases predictive accuracy to 94.5%. },
keywords = {Bayesian networks, bioinformatics, biomedical data, explainable AI, unsupervised learning},
pubstate = {published},
tppubtype = {article}
}
We present a workflow for clinical data analysis that relies on Bayesian Structure Learning (BSL), an unsupervised learning approach, robust to noise and biases, that allows to incorporate prior medical knowledge into the learning process and that provides explainable results in the form of a graph showing the causal connections among the analyzed features. The workflow consists in a multi-step approach that goes from identifying the main causes of patient's outcome through BSL, to the realization of a tool suitable for clinical practice, based on a Binary Decision Tree (BDT), to recognize patients at high-risk with information available already at hospital admission time. We evaluate our approach on a feature-rich COVID-19 dataset, showing that the proposed framework provides a schematic overview of the multi-factorial processes that jointly contribute to the outcome. We discuss how these computational findings are confirmed by current understanding of the COVID-19 pathogenesis. Further, our approach yields to a highly interpretable tool correctly predicting the outcome of 85% of subjects based exclusively on 3 features: age, a previous history of chronic obstructive pulmonary disease and the PaO2/FiO2 ratio at the time of arrival to the hospital. The inclusion of additional information from 4 routine blood tests (Creatinine, Glucose, pO2 and Sodium) increases predictive accuracy to 94.5%. Gravina, Alessio; Wilson, Jennifer L.; Bacciu, Davide; Grimes, Kevin J.; Priami, Corrado
Controlling astrocyte-mediated synaptic pruning signals for schizophrenia drug repurposing with Deep Graph Networks Journal Article
In: Plos Computational Biology, vol. 18, no. 5, 2022.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, deep learning for graphs, structured data processing
@article{Gravina2022,
title = {Controlling astrocyte-mediated synaptic pruning signals for schizophrenia drug repurposing with Deep Graph Networks},
author = {Alessio Gravina and Jennifer L. Wilson and Davide Bacciu and Kevin J. Grimes and Corrado Priami},
url = {https://www.biorxiv.org/content/10.1101/2021.10.07.463459v1, BioArxiv},
doi = {doi.org/10.1371/journal.pcbi.1009531},
year = {2022},
date = {2022-04-01},
urldate = {2022-04-01},
journal = {Plos Computational Biology},
volume = {18},
number = {5},
abstract = {Schizophrenia is a debilitating psychiatric disorder, leading to both physical and social morbidity. Worldwide 1% of the population is struggling with the disease, with 100,000 new cases annually only in the United States. Despite its importance, the goal of finding effective treatments for schizophrenia remains a challenging task, and previous work conducted expensive large-scale phenotypic screens. This work investigates the benefits of Machine Learning for graphs to optimize drug phenotypic screens and predict compounds that mitigate abnormal brain reduction induced by excessive glial phagocytic activity in schizophrenia subjects. Given a compound and its concentration as input, we propose a method that predicts a score associated with three possible compound effects, ie reduce, increase, or not influence phagocytosis. We leverage a high-throughput screening to prove experimentally that our method achieves good generalization capabilities. The screening involves 2218 compounds at five different concentrations. Then, we analyze the usability of our approach in a practical setting, ie prioritizing the selection of compounds in the SWEETLEAD library. We provide a list of 64 compounds from the library that have the most potential clinical utility for glial phagocytosis mitigation. Lastly, we propose a novel approach to computationally validate their utility as possible therapies for schizophrenia.},
keywords = {bioinformatics, biomedical data, deep learning for graphs, structured data processing},
pubstate = {published},
tppubtype = {article}
}
Schizophrenia is a debilitating psychiatric disorder, leading to both physical and social morbidity. Worldwide 1% of the population is struggling with the disease, with 100,000 new cases annually only in the United States. Despite its importance, the goal of finding effective treatments for schizophrenia remains a challenging task, and previous work conducted expensive large-scale phenotypic screens. This work investigates the benefits of Machine Learning for graphs to optimize drug phenotypic screens and predict compounds that mitigate abnormal brain reduction induced by excessive glial phagocytic activity in schizophrenia subjects. Given a compound and its concentration as input, we propose a method that predicts a score associated with three possible compound effects, ie reduce, increase, or not influence phagocytosis. We leverage a high-throughput screening to prove experimentally that our method achieves good generalization capabilities. The screening involves 2218 compounds at five different concentrations. Then, we analyze the usability of our approach in a practical setting, ie prioritizing the selection of compounds in the SWEETLEAD library. We provide a list of 64 compounds from the library that have the most potential clinical utility for glial phagocytosis mitigation. Lastly, we propose a novel approach to computationally validate their utility as possible therapies for schizophrenia. Bacciu, Davide; Lisboa, Paulo J. G.; Vellido, Alfredo
Deep Learning in Biology and Medicine Book
World Scientific Publisher, 2022, ISBN: 978-1-80061-093-4.
Abstract | Links | BibTeX | Tags: artificial intelligence, bioinformatics, biomedical data, deep learning
@book{BacciuBook2022,
title = {Deep Learning in Biology and Medicine},
author = {Davide Bacciu and Paulo J. G. Lisboa and Alfredo Vellido},
doi = {doi.org/10.1142/q0322 },
isbn = {978-1-80061-093-4},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
publisher = {World Scientific Publisher},
abstract = {Biology, medicine and biochemistry have become data-centric fields for which Deep Learning methods are delivering groundbreaking results. Addressing high impact challenges, Deep Learning in Biology and Medicine provides an accessible and organic collection of Deep Learning essays on bioinformatics and medicine. It caters for a wide readership, ranging from machine learning practitioners and data scientists seeking methodological knowledge to address biomedical applications, to life science specialists in search of a gentle reference for advanced data analytics.
With contributions from internationally renowned experts, the book covers foundational methodologies in a wide spectrum of life sciences applications, including electronic health record processing, diagnostic imaging, text processing, as well as omics-data processing. This survey of consolidated problems is complemented by a selection of advanced applications, including cheminformatics and biomedical interaction network analysis. A modern and mindful approach to the use of data-driven methodologies in the life sciences also requires careful consideration of the associated societal, ethical, legal and transparency challenges, which are covered in the concluding chapters of this book.},
keywords = {artificial intelligence, bioinformatics, biomedical data, deep learning},
pubstate = {published},
tppubtype = {book}
}
Biology, medicine and biochemistry have become data-centric fields for which Deep Learning methods are delivering groundbreaking results. Addressing high impact challenges, Deep Learning in Biology and Medicine provides an accessible and organic collection of Deep Learning essays on bioinformatics and medicine. It caters for a wide readership, ranging from machine learning practitioners and data scientists seeking methodological knowledge to address biomedical applications, to life science specialists in search of a gentle reference for advanced data analytics.
With contributions from internationally renowned experts, the book covers foundational methodologies in a wide spectrum of life sciences applications, including electronic health record processing, diagnostic imaging, text processing, as well as omics-data processing. This survey of consolidated problems is complemented by a selection of advanced applications, including cheminformatics and biomedical interaction network analysis. A modern and mindful approach to the use of data-driven methodologies in the life sciences also requires careful consideration of the associated societal, ethical, legal and transparency challenges, which are covered in the concluding chapters of this book.2021
Ferrari, Elisa; Bacciu, Davide
Addressing Fairness, Bias and Class Imbalance in Machine Learning: the FBI-loss Unpublished
Online on Arxiv, 2021.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, deep learning, trustworthy AI
@unpublished{Ferrari2021,
title = {Addressing Fairness, Bias and Class Imbalance in Machine Learning: the FBI-loss},
author = {Elisa Ferrari and Davide Bacciu},
url = {https://arxiv.org/abs/2105.06345, Arxiv},
year = {2021},
date = {2021-05-13},
urldate = {2021-05-13},
abstract = {Resilience to class imbalance and confounding biases, together with the assurance of fairness guarantees are highly desirable properties of autonomous decision-making systems with real-life impact. Many different targeted solutions have been proposed to address separately these three problems, however a unifying perspective seems to be missing. With this work, we provide a general formalization, showing that they are different expressions of unbalance. Following this intuition, we formulate a unified loss correction to address issues related to Fairness, Biases and Imbalances (FBI-loss). The correction capabilities of the proposed approach are assessed on three real-world benchmarks, each associated to one of the issues under consideration, and on a family of synthetic data in order to better investigate the effectiveness of our loss on tasks with different complexities. The empirical results highlight that the flexible formulation of the FBI-loss leads also to competitive performances with respect to literature solutions specialised for the single problems.},
howpublished = {Online on Arxiv},
keywords = {bioinformatics, biomedical data, deep learning, trustworthy AI},
pubstate = {published},
tppubtype = {unpublished}
}
Resilience to class imbalance and confounding biases, together with the assurance of fairness guarantees are highly desirable properties of autonomous decision-making systems with real-life impact. Many different targeted solutions have been proposed to address separately these three problems, however a unifying perspective seems to be missing. With this work, we provide a general formalization, showing that they are different expressions of unbalance. Following this intuition, we formulate a unified loss correction to address issues related to Fairness, Biases and Imbalances (FBI-loss). The correction capabilities of the proposed approach are assessed on three real-world benchmarks, each associated to one of the issues under consideration, and on a family of synthetic data in order to better investigate the effectiveness of our loss on tasks with different complexities. The empirical results highlight that the flexible formulation of the FBI-loss leads also to competitive performances with respect to literature solutions specialised for the single problems. Bontempi, Gianluca; Chavarriaga, Ricardo; Canck, Hans De; Girardi, Emanuela; Hoos, Holger; Kilbane-Dawe, Iarla; Ball, Tonio; Nowé, Ann; Sousa, Jose; Bacciu, Davide; Aldinucci, Marco; Domenico, Manlio De; Saffiotti, Alessandro; Maratea, Marco
The CLAIRE COVID-19 initiative: approach, experiences and recommendations Journal Article
In: Ethics and Information Technology, 2021.
Links | BibTeX | Tags: artificial intelligence, bioinformatics, biomedical data
@article{Bontempi2021,
title = {The CLAIRE COVID-19 initiative: approach, experiences and recommendations},
author = {Gianluca Bontempi and Ricardo Chavarriaga and Hans De Canck and Emanuela Girardi and Holger Hoos and Iarla Kilbane-Dawe and Tonio Ball and Ann Nowé and Jose Sousa and Davide Bacciu and Marco Aldinucci and Manlio De Domenico and Alessandro Saffiotti and Marco Maratea},
doi = {10.1007/s10676-020-09567-7},
year = {2021},
date = {2021-02-09},
journal = {Ethics and Information Technology},
keywords = {artificial intelligence, bioinformatics, biomedical data},
pubstate = {published},
tppubtype = {article}
}
2020

Podda, Marco; Micheli, Alessio; Bacciu, Davide; Milazzo, Paolo
Biochemical Pathway Robustness Prediction with Graph Neural Networks Conference
Proceedings of the European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN'20), 2020.
BibTeX | Tags: bioinformatics, biomedical data, deep learning for graphs, structured data processing
@conference{esann20Podda,
title = { Biochemical Pathway Robustness Prediction with Graph Neural Networks },
author = {Marco Podda and Alessio Micheli and Davide Bacciu and Paolo Milazzo},
editor = {Michel Verleysen},
year = {2020},
date = {2020-04-21},
booktitle = {Proceedings of the European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN'20)},
keywords = {bioinformatics, biomedical data, deep learning for graphs, structured data processing},
pubstate = {published},
tppubtype = {conference}
}

Ferrari, Elisa; Retico, Alessandra; Bacciu, Davide
Measuring the effects of confounders in medical supervised classification problems: the Confounding Index (CI) Journal Article
In: Artificial Intelligence in Medicine, vol. 103, 2020.
Abstract | Links | BibTeX | Tags: artificial intelligence, bioinformatics, biomedical data, explainable AI, statistics
@article{aime20Confound,
title = {Measuring the effects of confounders in medical supervised classification problems: the Confounding Index (CI)},
author = {Elisa Ferrari and Alessandra Retico and Davide Bacciu},
url = {https://arxiv.org/abs/1905.08871},
doi = {10.1016/j.artmed.2020.101804},
year = {2020},
date = {2020-03-01},
journal = {Artificial Intelligence in Medicine},
volume = {103},
abstract = {Over the years, there has been growing interest in using Machine Learning techniques for biomedical data processing. When tackling these tasks, one needs to bear in mind that biomedical data depends on a variety of characteristics, such as demographic aspects (age, gender, etc) or the acquisition technology, which might be unrelated with the target of the analysis. In supervised tasks, failing to match the ground truth targets with respect to such characteristics, called confounders, may lead to very misleading estimates of the predictive performance. Many strategies have been proposed to handle confounders, ranging from data selection, to normalization techniques, up to the use of training algorithm for learning with imbalanced data. However, all these solutions require the confounders to be known a priori. To this aim, we introduce a novel index that is able to measure the confounding effect of a data attribute in a bias-agnostic way. This index can be used to quantitatively compare the confounding effects of different variables and to inform correction methods such as normalization procedures or ad-hoc-prepared learning algorithms. The effectiveness of this index is validated on both simulated data and real-world neuroimaging data. },
keywords = {artificial intelligence, bioinformatics, biomedical data, explainable AI, statistics},
pubstate = {published},
tppubtype = {article}
}
Over the years, there has been growing interest in using Machine Learning techniques for biomedical data processing. When tackling these tasks, one needs to bear in mind that biomedical data depends on a variety of characteristics, such as demographic aspects (age, gender, etc) or the acquisition technology, which might be unrelated with the target of the analysis. In supervised tasks, failing to match the ground truth targets with respect to such characteristics, called confounders, may lead to very misleading estimates of the predictive performance. Many strategies have been proposed to handle confounders, ranging from data selection, to normalization techniques, up to the use of training algorithm for learning with imbalanced data. However, all these solutions require the confounders to be known a priori. To this aim, we introduce a novel index that is able to measure the confounding effect of a data attribute in a bias-agnostic way. This index can be used to quantitatively compare the confounding effects of different variables and to inform correction methods such as normalization procedures or ad-hoc-prepared learning algorithms. The effectiveness of this index is validated on both simulated data and real-world neuroimaging data. 2018
Marco, Podda; Davide, Bacciu; Alessio, Micheli; Roberto, Bellu; Giulia, Placidi; Luigi, Gagliardi
A machine learning approach to estimating preterm infants survival: development of the Preterm Infants Survival Assessment (PISA) predictor Journal Article
In: Nature Scientific Reports, vol. 8, 2018.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, neural networks, support vector machine
@article{naturescirep2018,
title = {A machine learning approach to estimating preterm infants survival: development of the Preterm Infants Survival Assessment (PISA) predictor},
author = {Podda Marco and Bacciu Davide and Micheli Alessio and Bellu Roberto and Placidi Giulia and Gagliardi Luigi },
url = {https://doi.org/10.1038/s41598-018-31920-6},
doi = {10.1038/s41598-018-31920-6},
year = {2018},
date = {2018-09-13},
journal = {Nature Scientific Reports},
volume = {8},
abstract = {Estimation of mortality risk of very preterm neonates is carried out in clinical and research settings. We aimed at elaborating a prediction tool using machine learning methods. We developed models on a cohort of 23747 neonates <30 weeks gestational age, or <1501 g birth weight, enrolled in the Italian Neonatal Network in 2008–2014 (development set), using 12 easily collected perinatal variables. We used a cohort from 2015–2016 (N = 5810) as a test set. Among several machine learning methods we chose artificial Neural Networks (NN). The resulting predictor was compared with logistic regression models. In the test cohort, NN had a slightly better discrimination than logistic regression (P < 0.002). The differences were greater in subgroups of neonates (at various gestational age or birth weight intervals, singletons). Using a cutoff of death probability of 0.5, logistic regression misclassified 67/5810 neonates (1.2 percent) more than NN. In conclusion our study – the largest published so far – shows that even in this very simplified scenario, using only limited information available up to 5 minutes after birth, a NN approach had a small but significant advantage over current approaches. The software implementing the predictor is made freely available to the community.},
keywords = {bioinformatics, biomedical data, neural networks, support vector machine},
pubstate = {published},
tppubtype = {article}
}
Estimation of mortality risk of very preterm neonates is carried out in clinical and research settings. We aimed at elaborating a prediction tool using machine learning methods. We developed models on a cohort of 23747 neonates <30 weeks gestational age, or <1501 g birth weight, enrolled in the Italian Neonatal Network in 2008–2014 (development set), using 12 easily collected perinatal variables. We used a cohort from 2015–2016 (N = 5810) as a test set. Among several machine learning methods we chose artificial Neural Networks (NN). The resulting predictor was compared with logistic regression models. In the test cohort, NN had a slightly better discrimination than logistic regression (P < 0.002). The differences were greater in subgroups of neonates (at various gestational age or birth weight intervals, singletons). Using a cutoff of death probability of 0.5, logistic regression misclassified 67/5810 neonates (1.2 percent) more than NN. In conclusion our study – the largest published so far – shows that even in this very simplified scenario, using only limited information available up to 5 minutes after birth, a NN approach had a small but significant advantage over current approaches. The software implementing the predictor is made freely available to the community. Davide, Bacciu; JG, Lisboa Paulo; D, Martin Jose; Ruxandra, Stoean; Alfredo, Vellido
Bioinformatics and medicine in the era of deep learning Conference
Proceedings of the European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN'18), i6doc.com, Louvain-la-Neuve, Belgium, 2018, ISBN: 978-287587047-6.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, deep learning
@conference{esann2018Tut,
title = {Bioinformatics and medicine in the era of deep learning},
author = {Bacciu Davide and Lisboa Paulo JG and Martin Jose D and Stoean Ruxandra and Vellido Alfredo},
editor = {Michel Verleysen},
url = {http://arxiv.org/abs/1802.09791},
isbn = {978-287587047-6},
year = {2018},
date = {2018-04-26},
booktitle = {Proceedings of the European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN'18)},
pages = {345-354},
publisher = {i6doc.com},
address = {Louvain-la-Neuve, Belgium},
abstract = {Many of the current scientific advances in the life sciences have their origin in the intensive use of data for knowledge discovery. In no area this is so clear as in bioinformatics, led by technological breakthroughs in data acquisition technologies. It has been argued that bioinformatics could quickly become the field of research generating the largest data repositories, beating other data-intensive areas such as high-energy physics or astroinformatics. Over the last decade, deep learning has become a disruptive advance in machine learning, giving new live to the long-standing connectionist paradigm in artificial intelligence. Deep learning methods are ideally suited to large-scale data and, therefore, they should be ideally suited to knowledge discovery in bioinformatics and biomedicine at large. In this brief paper, we review key aspects of the application of deep learning in bioinformatics and medicine, drawing from the themes covered by the contributions to an ESANN 2018 special session devoted to this topic.},
keywords = {bioinformatics, biomedical data, deep learning},
pubstate = {published},
tppubtype = {conference}
}
Many of the current scientific advances in the life sciences have their origin in the intensive use of data for knowledge discovery. In no area this is so clear as in bioinformatics, led by technological breakthroughs in data acquisition technologies. It has been argued that bioinformatics could quickly become the field of research generating the largest data repositories, beating other data-intensive areas such as high-energy physics or astroinformatics. Over the last decade, deep learning has become a disruptive advance in machine learning, giving new live to the long-standing connectionist paradigm in artificial intelligence. Deep learning methods are ideally suited to large-scale data and, therefore, they should be ideally suited to knowledge discovery in bioinformatics and biomedicine at large. In this brief paper, we review key aspects of the application of deep learning in bioinformatics and medicine, drawing from the themes covered by the contributions to an ESANN 2018 special session devoted to this topic.