Publications
2022
Sangermano, Matteo; Carta, Antonio; Cossu, Andrea; Lomonaco, Vincenzo; Bacciu, Davide
Sample Condensation in Online Continual Learning Conference
Proceedings of the 2022 IEEE World Congress on Computational Intelligence, IEEE, 2022.
Abstract | BibTeX | Tags: Continual learning, generative model
@conference{Sangermano2022,
title = {Sample Condensation in Online Continual Learning},
author = {Matteo Sangermano and Antonio Carta and Andrea Cossu and Vincenzo Lomonaco and Davide Bacciu
},
year = {2022},
date = {2022-07-18},
urldate = {2022-07-18},
booktitle = {Proceedings of the 2022 IEEE World Congress on Computational Intelligence},
publisher = {IEEE},
abstract = {Online Continual learning is a challenging learning scenario where the model observes a non-stationary stream of data and learns online. The main challenge is to incrementally learn while avoiding catastrophic forgetting, namely the problem of forgetting previously acquired knowledge while learning from new data. A popular solution in these scenario is to use a small memory to retain old data and rehearse them over time. Unfortunately, due to the limited memory size, the quality of the memory will deteriorate over time. In this paper we propose OLCGM, a novel replay-based continual learning strategy that uses knowledge condensation techniques to continuously compress the memory and achieve a better use of its limited size. The sample condensation step compresses old samples, instead of removing them like other replay strategies. As a result, the experiments show that, whenever the memory budget is limited compared to the complexity of the data, OLCGM improves the final accuracy compared to state-of-the-art replay strategies.},
keywords = {Continual learning, generative model},
pubstate = {published},
tppubtype = {conference}
}
Online Continual learning is a challenging learning scenario where the model observes a non-stationary stream of data and learns online. The main challenge is to incrementally learn while avoiding catastrophic forgetting, namely the problem of forgetting previously acquired knowledge while learning from new data. A popular solution in these scenario is to use a small memory to retain old data and rehearse them over time. Unfortunately, due to the limited memory size, the quality of the memory will deteriorate over time. In this paper we propose OLCGM, a novel replay-based continual learning strategy that uses knowledge condensation techniques to continuously compress the memory and achieve a better use of its limited size. The sample condensation step compresses old samples, instead of removing them like other replay strategies. As a result, the experiments show that, whenever the memory budget is limited compared to the complexity of the data, OLCGM improves the final accuracy compared to state-of-the-art replay strategies. Valenti, Andrea; Bacciu, Davide
Leveraging Relational Information for Learning Weakly Disentangled Representations Conference
Proceedings of the 2022 IEEE World Congress on Computational Intelligence, IEEE, 2022.
Abstract | BibTeX | Tags: adversarial learning, autoencoder, generative model, learning-symbolic integration
@conference{Valenti2022,
title = { Leveraging Relational Information for Learning Weakly Disentangled Representations },
author = {Andrea Valenti and Davide Bacciu
},
year = {2022},
date = {2022-07-18},
urldate = {2022-07-18},
booktitle = {Proceedings of the 2022 IEEE World Congress on Computational Intelligence},
publisher = {IEEE},
abstract = {Disentanglement is a difficult property to enforce in neural representations. This might be due, in part, to a formalization of the disentanglement problem that focuses too heavily on separating relevant factors of variation of the data in single isolated dimensions of the neural representation. We argue that such a definition might be too restrictive and not necessarily beneficial in terms of downstream tasks. In this work, we present an alternative view over learning (weakly) disentangled representations, which leverages concepts from relational learning. We identify the regions of the latent space that correspond to specific instances of generative factors, and we learn the relationships among these regions in order to perform controlled changes to the latent codes. We also introduce a compound generative model that implements such a weak disentanglement approach. Our experiments shows that the learned representations can separate the relevant factors of variation in the data, while preserving the information needed for effectively generating high quality data samples.},
keywords = {adversarial learning, autoencoder, generative model, learning-symbolic integration},
pubstate = {published},
tppubtype = {conference}
}
Disentanglement is a difficult property to enforce in neural representations. This might be due, in part, to a formalization of the disentanglement problem that focuses too heavily on separating relevant factors of variation of the data in single isolated dimensions of the neural representation. We argue that such a definition might be too restrictive and not necessarily beneficial in terms of downstream tasks. In this work, we present an alternative view over learning (weakly) disentangled representations, which leverages concepts from relational learning. We identify the regions of the latent space that correspond to specific instances of generative factors, and we learn the relationships among these regions in order to perform controlled changes to the latent codes. We also introduce a compound generative model that implements such a weak disentanglement approach. Our experiments shows that the learned representations can separate the relevant factors of variation in the data, while preserving the information needed for effectively generating high quality data samples. Carta, Antonio; Cossu, Andrea; Lomonaco, Vincenzo; Bacciu, Davide
Ex-Model: Continual Learning from a Stream of Trained Models Conference
Proceedings of the CVPR 2022 Workshop on Continual Learning , IEEE 2022.
Abstract | Links | BibTeX | Tags: Continual learning, deep learning, trustworthy AI
@conference{carta2021ex,
title = {Ex-Model: Continual Learning from a Stream of Trained Models},
author = {Antonio Carta and Andrea Cossu and Vincenzo Lomonaco and Davide Bacciu},
url = {https://arxiv.org/pdf/2112.06511.pdf, Arxiv},
year = {2022},
date = {2022-06-20},
urldate = {2022-06-20},
booktitle = {Proceedings of the CVPR 2022 Workshop on Continual Learning },
journal = {arXiv preprint arXiv:2112.06511},
organization = {IEEE},
abstract = {Learning continually from non-stationary data streams is a challenging research topic of growing popularity in the last few years. Being able to learn, adapt, and generalize continually in an efficient, effective, and scalable way is fundamental for a sustainable development of Artificial Intelligent systems. However, an agent-centric view of continual learning requires learning directly from raw data, which limits the interaction between independent agents, the efficiency, and the privacy of current approaches. Instead, we argue that continual learning systems should exploit the availability of compressed information in the form of trained models. In this paper, we introduce and formalize a new paradigm named "Ex-Model Continual Learning" (ExML), where an agent learns from a sequence of previously trained models instead of raw data. We further contribute with three ex-model continual learning algorithms and an empirical setting comprising three datasets (MNIST, CIFAR-10 and CORe50), and eight scenarios, where the proposed algorithms are extensively tested. Finally, we highlight the peculiarities of the ex-model paradigm and we point out interesting future research directions. },
keywords = {Continual learning, deep learning, trustworthy AI},
pubstate = {published},
tppubtype = {conference}
}
Learning continually from non-stationary data streams is a challenging research topic of growing popularity in the last few years. Being able to learn, adapt, and generalize continually in an efficient, effective, and scalable way is fundamental for a sustainable development of Artificial Intelligent systems. However, an agent-centric view of continual learning requires learning directly from raw data, which limits the interaction between independent agents, the efficiency, and the privacy of current approaches. Instead, we argue that continual learning systems should exploit the availability of compressed information in the form of trained models. In this paper, we introduce and formalize a new paradigm named "Ex-Model Continual Learning" (ExML), where an agent learns from a sequence of previously trained models instead of raw data. We further contribute with three ex-model continual learning algorithms and an empirical setting comprising three datasets (MNIST, CIFAR-10 and CORe50), and eight scenarios, where the proposed algorithms are extensively tested. Finally, we highlight the peculiarities of the ex-model paradigm and we point out interesting future research directions. Serramazza, Davide Italo; Bacciu, Davide
Learning image captioning as a structured transduction task Conference
Proceedings of the 23rd International Conference on Engineering Applications of Neural Networks (EANN 2022), 2022.
BibTeX | Tags: generative model, image captioning, structured data processing, tree structured data, tree transductions
@conference{Serramazza2022,
title = {Learning image captioning as a structured transduction task},
author = {Davide Italo Serramazza and Davide Bacciu},
year = {2022},
date = {2022-06-20},
urldate = {2022-06-20},
booktitle = {Proceedings of the 23rd International Conference on Engineering Applications of Neural Networks (EANN 2022)},
keywords = {generative model, image captioning, structured data processing, tree structured data, tree transductions},
pubstate = {published},
tppubtype = {conference}
}
Lucchesi, Nicolò; Carta, Antonio; Lomonaco, Vincenzo; Bacciu, Davide
Avalanche RL: a Continual Reinforcement Learning Library Conference
Proceedings of the 21st International Conference on Image Analysis and Processing (ICIAP 2021), 2022.
Abstract | Links | BibTeX | Tags: Continual learning, reinforcement learning
@conference{Lucchesi2022,
title = {Avalanche RL: a Continual Reinforcement Learning Library},
author = {Nicolò Lucchesi and Antonio Carta and Vincenzo Lomonaco and Davide Bacciu},
url = {https://arxiv.org/abs/2202.13657, Arxiv},
year = {2022},
date = {2022-05-23},
urldate = {2022-05-23},
booktitle = {Proceedings of the 21st International Conference on Image Analysis and Processing (ICIAP 2021)},
abstract = {Continual Reinforcement Learning (CRL) is a challenging setting where an agent learns to interact with an environment that is constantly changing over time (the stream of experiences). In this paper, we describe Avalanche RL, a library for Continual Reinforcement Learning which allows to easily train agents on a continuous stream of tasks. Avalanche RL is based on PyTorch and supports any OpenAI Gym environment. Its design is based on Avalanche, one of the more popular continual learning libraries, which allow us to reuse a large number of continual learning strategies and improve the interaction between reinforcement learning and continual learning researchers. Additionally, we propose Continual Habitat-Lab, a novel benchmark and a high-level library which enables the usage of the photorealistic simulator Habitat-Sim for CRL research. Overall, Avalanche RL attempts to unify under a common framework continual reinforcement learning applications, which we hope will foster the growth of the field. },
keywords = {Continual learning, reinforcement learning},
pubstate = {published},
tppubtype = {conference}
}
Continual Reinforcement Learning (CRL) is a challenging setting where an agent learns to interact with an environment that is constantly changing over time (the stream of experiences). In this paper, we describe Avalanche RL, a library for Continual Reinforcement Learning which allows to easily train agents on a continuous stream of tasks. Avalanche RL is based on PyTorch and supports any OpenAI Gym environment. Its design is based on Avalanche, one of the more popular continual learning libraries, which allow us to reuse a large number of continual learning strategies and improve the interaction between reinforcement learning and continual learning researchers. Additionally, we propose Continual Habitat-Lab, a novel benchmark and a high-level library which enables the usage of the photorealistic simulator Habitat-Sim for CRL research. Overall, Avalanche RL attempts to unify under a common framework continual reinforcement learning applications, which we hope will foster the growth of the field. Numeroso, Danilo; Bacciu, Davide; Veličković, Petar
Learning heuristics for A* Workshop
ICRL 2022 Workshop on Anchoring Machine Learning in Classical Algorithmic Theory (GroundedML 2022), 2022.
Abstract | BibTeX | Tags: algorithmic reasoning, deep learning for graphs, learning-symbolic integration
@workshop{Numeroso2022,
title = {Learning heuristics for A*},
author = { Danilo Numeroso and Davide Bacciu and Petar Veličković},
year = {2022},
date = {2022-04-29},
urldate = {2022-04-29},
booktitle = {ICRL 2022 Workshop on Anchoring Machine Learning in Classical Algorithmic Theory (GroundedML 2022)},
abstract = {Path finding in graphs is one of the most studied classes of problems in computer science. In this context, search algorithms are often extended with heuristics for a more efficient search of target nodes. In this work we combine recent advancements in Neural Algorithmic Reasoning to learn efficient heuristic functions for path finding problems on graphs. At training time, we exploit multi-task learning to learn jointly the Dijkstra's algorithm and a {it consistent} heuristic function for the A* search algorithm. At inference time, we plug our learnt heuristics into the A* algorithm. Results show that running A* over the learnt heuristics value can greatly speed up target node searching compared to Dijkstra, while still finding minimal-cost paths.
},
keywords = {algorithmic reasoning, deep learning for graphs, learning-symbolic integration},
pubstate = {published},
tppubtype = {workshop}
}
Path finding in graphs is one of the most studied classes of problems in computer science. In this context, search algorithms are often extended with heuristics for a more efficient search of target nodes. In this work we combine recent advancements in Neural Algorithmic Reasoning to learn efficient heuristic functions for path finding problems on graphs. At training time, we exploit multi-task learning to learn jointly the Dijkstra's algorithm and a {it consistent} heuristic function for the A* search algorithm. At inference time, we plug our learnt heuristics into the A* algorithm. Results show that running A* over the learnt heuristics value can greatly speed up target node searching compared to Dijkstra, while still finding minimal-cost paths.
Ferrari, Elisa; Gargani, Luna; Barbieri, Greta; Ghiadoni, Lorenzo; Faita, Francesco; Bacciu, Davide
A causal learning framework for the analysis and interpretation of COVID-19 clinical data Journal Article Forthcoming
In: Plos One, Forthcoming.
Abstract | Links | BibTeX | Tags: Bayesian networks, bioinformatics, biomedical data, explainable AI, unsupervised learning
@article{DBLP:journals/corr/abs-2105-06998,
title = {A causal learning framework for the analysis and interpretation of COVID-19 clinical data},
author = {Elisa Ferrari and Luna Gargani and Greta Barbieri and Lorenzo Ghiadoni and Francesco Faita and Davide Bacciu},
url = {https://arxiv.org/abs/2105.06998, Arxiv},
year = {2022},
date = {2022-04-27},
urldate = {2022-01-01},
journal = {Plos One},
abstract = {We present a workflow for clinical data analysis that relies on Bayesian Structure Learning (BSL), an unsupervised learning approach, robust to noise and biases, that allows to incorporate prior medical knowledge into the learning process and that provides explainable results in the form of a graph showing the causal connections among the analyzed features. The workflow consists in a multi-step approach that goes from identifying the main causes of patient's outcome through BSL, to the realization of a tool suitable for clinical practice, based on a Binary Decision Tree (BDT), to recognize patients at high-risk with information available already at hospital admission time. We evaluate our approach on a feature-rich COVID-19 dataset, showing that the proposed framework provides a schematic overview of the multi-factorial processes that jointly contribute to the outcome. We discuss how these computational findings are confirmed by current understanding of the COVID-19 pathogenesis. Further, our approach yields to a highly interpretable tool correctly predicting the outcome of 85% of subjects based exclusively on 3 features: age, a previous history of chronic obstructive pulmonary disease and the PaO2/FiO2 ratio at the time of arrival to the hospital. The inclusion of additional information from 4 routine blood tests (Creatinine, Glucose, pO2 and Sodium) increases predictive accuracy to 94.5%. },
keywords = {Bayesian networks, bioinformatics, biomedical data, explainable AI, unsupervised learning},
pubstate = {forthcoming},
tppubtype = {article}
}
We present a workflow for clinical data analysis that relies on Bayesian Structure Learning (BSL), an unsupervised learning approach, robust to noise and biases, that allows to incorporate prior medical knowledge into the learning process and that provides explainable results in the form of a graph showing the causal connections among the analyzed features. The workflow consists in a multi-step approach that goes from identifying the main causes of patient's outcome through BSL, to the realization of a tool suitable for clinical practice, based on a Binary Decision Tree (BDT), to recognize patients at high-risk with information available already at hospital admission time. We evaluate our approach on a feature-rich COVID-19 dataset, showing that the proposed framework provides a schematic overview of the multi-factorial processes that jointly contribute to the outcome. We discuss how these computational findings are confirmed by current understanding of the COVID-19 pathogenesis. Further, our approach yields to a highly interpretable tool correctly predicting the outcome of 85% of subjects based exclusively on 3 features: age, a previous history of chronic obstructive pulmonary disease and the PaO2/FiO2 ratio at the time of arrival to the hospital. The inclusion of additional information from 4 routine blood tests (Creatinine, Glucose, pO2 and Sodium) increases predictive accuracy to 94.5%. Collodi, Lorenzo; Bacciu, Davide; Bianchi, Matteo; Averta, Giuseppe
Learning with few examples the semantic description of novel human-inspired grasp strategies from RGB data Journal Article
In: IEEE Robotics and Automation Letters, pp. 2573 - 2580, 2022.
Abstract | Links | BibTeX | Tags: deep learning for graphs, graph data, learning-symbolic integration, robotics
@article{Collodi2022,
title = {Learning with few examples the semantic description of novel human-inspired grasp strategies from RGB data},
author = { Lorenzo Collodi and Davide Bacciu and Matteo Bianchi and Giuseppe Averta},
url = {https://www.researchgate.net/profile/Giuseppe-Averta/publication/358006552_Learning_With_Few_Examples_the_Semantic_Description_of_Novel_Human-Inspired_Grasp_Strategies_From_RGB_Data/links/61eae01e8d338833e3857251/Learning-With-Few-Examples-the-Semantic-Description-of-Novel-Human-Inspired-Grasp-Strategies-From-RGB-Data.pdf, Open Version},
doi = {https://doi.org/10.1109/LRA.2022.3144520},
year = {2022},
date = {2022-04-04},
urldate = {2022-02-28},
journal = { IEEE Robotics and Automation Letters},
pages = { 2573 - 2580},
publisher = {IEEE},
abstract = {Data-driven approaches and human inspiration are fundamental to endow robotic manipulators with advanced autonomous grasping capabilities. However, to capitalize upon these two pillars, several aspects need to be considered, which include the number of human examples used for training; the need for having in advance all the required information for classification (hardly feasible in unstructured environments); the trade-off between the task performance and the processing cost. In this paper, we propose a RGB-based pipeline that can identify the object to be grasped and guide the actual execution of the grasping primitive selected through a combination of Convolutional and Gated Graph Neural Networks. We consider a set of human-inspired grasp strategies, which are afforded by the geometrical properties of the objects and identified from a human grasping taxonomy, and propose to learn new grasping skills with only a few examples. We test our framework with a manipulator endowed with an under-actuated soft robotic hand. Even though we use only 2D information to minimize the footprint of the network, we achieve 90% of successful identifications of the most appropriate human-inspired grasping strategy over ten different classes, of which three were few-shot learned, outperforming an ideal model trained with all the classes, in sample-scarce conditions.},
keywords = {deep learning for graphs, graph data, learning-symbolic integration, robotics},
pubstate = {published},
tppubtype = {article}
}
Data-driven approaches and human inspiration are fundamental to endow robotic manipulators with advanced autonomous grasping capabilities. However, to capitalize upon these two pillars, several aspects need to be considered, which include the number of human examples used for training; the need for having in advance all the required information for classification (hardly feasible in unstructured environments); the trade-off between the task performance and the processing cost. In this paper, we propose a RGB-based pipeline that can identify the object to be grasped and guide the actual execution of the grasping primitive selected through a combination of Convolutional and Gated Graph Neural Networks. We consider a set of human-inspired grasp strategies, which are afforded by the geometrical properties of the objects and identified from a human grasping taxonomy, and propose to learn new grasping skills with only a few examples. We test our framework with a manipulator endowed with an under-actuated soft robotic hand. Even though we use only 2D information to minimize the footprint of the network, we achieve 90% of successful identifications of the most appropriate human-inspired grasping strategy over ten different classes, of which three were few-shot learned, outperforming an ideal model trained with all the classes, in sample-scarce conditions. Bacciu, Davide; Numeroso, Danilo
Explaining Deep Graph Networks via Input Perturbation Journal Article Forthcoming
In: IEEE Transactions on Neural Networks and Learning Systems, Forthcoming.
Abstract | Links | BibTeX | Tags: adversarial examples, deep learning for graphs, explainable AI, generative model, structured data processing, trustworthy AI
@article{Bacciu2022,
title = {Explaining Deep Graph Networks via Input Perturbation},
author = {Davide Bacciu and Danilo Numeroso
},
doi = {10.1109/TNNLS.2022.3165618},
year = {2022},
date = {2022-04-01},
urldate = {2022-04-01},
journal = {IEEE Transactions on Neural Networks and Learning Systems},
abstract = {Deep Graph Networks are a family of machine learning models for structured data which are finding heavy application in life-sciences (drug repurposing, molecular property predictions) and on social network data (recommendation systems). The privacy and safety-critical nature of such domains motivates the need for developing effective explainability methods for this family of models. So far, progress in this field has been challenged by the combinatorial nature and complexity of graph structures. In this respect, we present a novel local explanation framework specifically tailored to graph data and deep graph networks. Our approach leverages reinforcement learning to generate meaningful local perturbations of the input graph, whose prediction we seek an interpretation for. These perturbed data points are obtained by optimising a multi-objective score taking into account similarities both at a structural level as well as at the level of the deep model outputs. By this means, we are able to populate a set of informative neighbouring samples for the query graph, which is then used to fit an interpretable model for the predictive behaviour of the deep network locally to the query graph prediction. We show the effectiveness of the proposed explainer by a qualitative analysis on two chemistry datasets, TOS and ESOL and by quantitative results on a benchmark dataset for explanations, CYCLIQ.
},
keywords = {adversarial examples, deep learning for graphs, explainable AI, generative model, structured data processing, trustworthy AI},
pubstate = {forthcoming},
tppubtype = {article}
}
Deep Graph Networks are a family of machine learning models for structured data which are finding heavy application in life-sciences (drug repurposing, molecular property predictions) and on social network data (recommendation systems). The privacy and safety-critical nature of such domains motivates the need for developing effective explainability methods for this family of models. So far, progress in this field has been challenged by the combinatorial nature and complexity of graph structures. In this respect, we present a novel local explanation framework specifically tailored to graph data and deep graph networks. Our approach leverages reinforcement learning to generate meaningful local perturbations of the input graph, whose prediction we seek an interpretation for. These perturbed data points are obtained by optimising a multi-objective score taking into account similarities both at a structural level as well as at the level of the deep model outputs. By this means, we are able to populate a set of informative neighbouring samples for the query graph, which is then used to fit an interpretable model for the predictive behaviour of the deep network locally to the query graph prediction. We show the effectiveness of the proposed explainer by a qualitative analysis on two chemistry datasets, TOS and ESOL and by quantitative results on a benchmark dataset for explanations, CYCLIQ.
Gravina, Alessio; Wilson, Jennifer L.; Bacciu, Davide; Grimes, Kevin J.; Priami, Corrado
Controlling astrocyte-mediated synaptic pruning signals for schizophrenia drug repurposing with Deep Graph Networks Journal Article Forthcoming
In: Plos Computational Biology, Forthcoming.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, deep learning for graphs, structured data processing
@article{Gravina2022,
title = {Controlling astrocyte-mediated synaptic pruning signals for schizophrenia drug repurposing with Deep Graph Networks},
author = {Alessio Gravina and Jennifer L. Wilson and Davide Bacciu and Kevin J. Grimes and Corrado Priami},
url = {https://www.biorxiv.org/content/10.1101/2021.10.07.463459v1, BioArxiv},
year = {2022},
date = {2022-04-01},
journal = {Plos Computational Biology},
abstract = {Schizophrenia is a debilitating psychiatric disorder, leading to both physical and social morbidity. Worldwide 1% of the population is struggling with the disease, with 100,000 new cases annually only in the United States. Despite its importance, the goal of finding effective treatments for schizophrenia remains a challenging task, and previous work conducted expensive large-scale phenotypic screens. This work investigates the benefits of Machine Learning for graphs to optimize drug phenotypic screens and predict compounds that mitigate abnormal brain reduction induced by excessive glial phagocytic activity in schizophrenia subjects. Given a compound and its concentration as input, we propose a method that predicts a score associated with three possible compound effects, ie reduce, increase, or not influence phagocytosis. We leverage a high-throughput screening to prove experimentally that our method achieves good generalization capabilities. The screening involves 2218 compounds at five different concentrations. Then, we analyze the usability of our approach in a practical setting, ie prioritizing the selection of compounds in the SWEETLEAD library. We provide a list of 64 compounds from the library that have the most potential clinical utility for glial phagocytosis mitigation. Lastly, we propose a novel approach to computationally validate their utility as possible therapies for schizophrenia.},
keywords = {bioinformatics, biomedical data, deep learning for graphs, structured data processing},
pubstate = {forthcoming},
tppubtype = {article}
}
Schizophrenia is a debilitating psychiatric disorder, leading to both physical and social morbidity. Worldwide 1% of the population is struggling with the disease, with 100,000 new cases annually only in the United States. Despite its importance, the goal of finding effective treatments for schizophrenia remains a challenging task, and previous work conducted expensive large-scale phenotypic screens. This work investigates the benefits of Machine Learning for graphs to optimize drug phenotypic screens and predict compounds that mitigate abnormal brain reduction induced by excessive glial phagocytic activity in schizophrenia subjects. Given a compound and its concentration as input, we propose a method that predicts a score associated with three possible compound effects, ie reduce, increase, or not influence phagocytosis. We leverage a high-throughput screening to prove experimentally that our method achieves good generalization capabilities. The screening involves 2218 compounds at five different concentrations. Then, we analyze the usability of our approach in a practical setting, ie prioritizing the selection of compounds in the SWEETLEAD library. We provide a list of 64 compounds from the library that have the most potential clinical utility for glial phagocytosis mitigation. Lastly, we propose a novel approach to computationally validate their utility as possible therapies for schizophrenia. Caro, Valerio De; Bano, Saira; Machumilane, Achilles; Gotta, Alberto; Cassará, Pietro; Carta, Antonio; Sardianos, Christos; Chronis, Christos; Varlamis, Iraklis; Tserpes, Konstantinos; Lomonaco, Vincenzo; Gallicchio, Claudio; Bacciu, Davide
AI-as-a-Service Toolkit for Human-Centered Intelligence in Autonomous Driving Conference
Proceedings of the 20th International Conference on Pervasive Computing and Communications (PerCom 2022), 2022.
Links | BibTeX | Tags: activity recognition, AI-as-a-service, deep learning, humanistic intelligence, machine vision, Sequential data
@conference{decaro2022aiasaservice,
title = {AI-as-a-Service Toolkit for Human-Centered Intelligence in Autonomous Driving},
author = {Valerio De Caro and Saira Bano and Achilles Machumilane and Alberto Gotta and Pietro Cassará and Antonio Carta and Christos Sardianos and Christos Chronis and Iraklis Varlamis and Konstantinos Tserpes and Vincenzo Lomonaco and Claudio Gallicchio and Davide Bacciu},
url = {https://arxiv.org/pdf/2202.01645.pdf, arxiv},
year = {2022},
date = {2022-03-21},
urldate = {2022-03-21},
booktitle = {Proceedings of the 20th International Conference on Pervasive Computing and Communications (PerCom 2022)},
keywords = {activity recognition, AI-as-a-service, deep learning, humanistic intelligence, machine vision, Sequential data},
pubstate = {published},
tppubtype = {conference}
}
Bacciu, Davide; Lisboa, Paulo J. G.; Vellido, Alfredo
Deep Learning in Biology and Medicine Book Forthcoming
World Scientific Publisher, Forthcoming, ISBN: 978-1-80061-093-4.
Abstract | Links | BibTeX | Tags: artificial intelligence, bioinformatics, biomedical data, deep learning
@book{BacciuBook2022,
title = {Deep Learning in Biology and Medicine},
author = {Davide Bacciu and Paulo J. G. Lisboa and Alfredo Vellido},
doi = {doi.org/10.1142/q0322 },
isbn = {978-1-80061-093-4},
year = {2022},
date = {2022-02-01},
urldate = {2022-02-01},
publisher = {World Scientific Publisher},
abstract = {Biology, medicine and biochemistry have become data-centric fields for which Deep Learning methods are delivering groundbreaking results. Addressing high impact challenges, Deep Learning in Biology and Medicine provides an accessible and organic collection of Deep Learning essays on bioinformatics and medicine. It caters for a wide readership, ranging from machine learning practitioners and data scientists seeking methodological knowledge to address biomedical applications, to life science specialists in search of a gentle reference for advanced data analytics.
With contributions from internationally renowned experts, the book covers foundational methodologies in a wide spectrum of life sciences applications, including electronic health record processing, diagnostic imaging, text processing, as well as omics-data processing. This survey of consolidated problems is complemented by a selection of advanced applications, including cheminformatics and biomedical interaction network analysis. A modern and mindful approach to the use of data-driven methodologies in the life sciences also requires careful consideration of the associated societal, ethical, legal and transparency challenges, which are covered in the concluding chapters of this book.},
keywords = {artificial intelligence, bioinformatics, biomedical data, deep learning},
pubstate = {forthcoming},
tppubtype = {book}
}
Biology, medicine and biochemistry have become data-centric fields for which Deep Learning methods are delivering groundbreaking results. Addressing high impact challenges, Deep Learning in Biology and Medicine provides an accessible and organic collection of Deep Learning essays on bioinformatics and medicine. It caters for a wide readership, ranging from machine learning practitioners and data scientists seeking methodological knowledge to address biomedical applications, to life science specialists in search of a gentle reference for advanced data analytics.
With contributions from internationally renowned experts, the book covers foundational methodologies in a wide spectrum of life sciences applications, including electronic health record processing, diagnostic imaging, text processing, as well as omics-data processing. This survey of consolidated problems is complemented by a selection of advanced applications, including cheminformatics and biomedical interaction network analysis. A modern and mindful approach to the use of data-driven methodologies in the life sciences also requires careful consideration of the associated societal, ethical, legal and transparency challenges, which are covered in the concluding chapters of this book. Castellana, Daniele; Bacciu, Davide
A Tensor Framework for Learning in Structured Domains Journal Article
In: Neurocomputing, 470 , pp. 405-426, 2022.
Abstract | Links | BibTeX | Tags: deep learning, structured data processing, tensor factorization, tensor neural networks, tree structured data
@article{Castellana2021,
title = {A Tensor Framework for Learning in Structured Domains},
author = {Daniele Castellana and Davide Bacciu},
editor = {Kerstin Bunte and Niccolo Navarin and Luca Oneto},
doi = {10.1016/j.neucom.2021.05.110},
year = {2022},
date = {2022-01-22},
urldate = {2021-05-03},
journal = {Neurocomputing},
volume = {470},
pages = {405-426},
abstract = {Learning machines for structured data (e.g., trees) are intrinsically based on their capacity to learn representations by aggregating information from the multi-way relationships emerging from the structure topology. While complex aggregation functions are desirable in this context to increase the expressiveness of the learned representations, the modelling of higher-order interactions among structure constituents is unfeasible, in practice, due to the exponential number of parameters required. Therefore, the common approach is to define models which rely only on first-order interactions among structure constituents.
In this work, we leverage tensors theory to define a framework for learning in structured domains. Such a framework is built on the observation that more expressive models require a tensor parameterisation. This observation is the stepping stone for the application of tensor decompositions in the context of recursive models. From this point of view, the advantage of using tensor decompositions is twofold since it allows limiting the number of model parameters while injecting inductive biases that do not ignore higher-order interactions.
We apply the proposed framework on probabilistic and neural models for structured data, defining different models which leverage tensor decompositions. The experimental validation clearly shows the advantage of these models compared to first-order and full-tensorial models.},
keywords = {deep learning, structured data processing, tensor factorization, tensor neural networks, tree structured data},
pubstate = {published},
tppubtype = {article}
}
Learning machines for structured data (e.g., trees) are intrinsically based on their capacity to learn representations by aggregating information from the multi-way relationships emerging from the structure topology. While complex aggregation functions are desirable in this context to increase the expressiveness of the learned representations, the modelling of higher-order interactions among structure constituents is unfeasible, in practice, due to the exponential number of parameters required. Therefore, the common approach is to define models which rely only on first-order interactions among structure constituents.
In this work, we leverage tensors theory to define a framework for learning in structured domains. Such a framework is built on the observation that more expressive models require a tensor parameterisation. This observation is the stepping stone for the application of tensor decompositions in the context of recursive models. From this point of view, the advantage of using tensor decompositions is twofold since it allows limiting the number of model parameters while injecting inductive biases that do not ignore higher-order interactions.
We apply the proposed framework on probabilistic and neural models for structured data, defining different models which leverage tensor decompositions. The experimental validation clearly shows the advantage of these models compared to first-order and full-tensorial models. Carta, Antonio; Cossu, Andrea; Errica, Federico; Bacciu, Davide
Catastrophic Forgetting in Deep Graph Networks: a Graph Classification benchmark Journal Article
In: Frontiers in Artificial Intelligence , 2022.
Abstract | Links | BibTeX | Tags: Continual learning, deep learning for graphs, graph data, structured data processing
@article{Carta2022,
title = {Catastrophic Forgetting in Deep Graph Networks: a Graph Classification benchmark},
author = {Antonio Carta and Andrea Cossu and Federico Errica and Davide Bacciu},
doi = {10.3389/frai.2022.824655},
year = {2022},
date = {2022-01-11},
journal = {Frontiers in Artificial Intelligence },
abstract = { In this work, we study the phenomenon of catastrophic forgetting in the graph representation learning scenario. The primary objective of the analysis is to understand whether classical continual learning techniques for flat and sequential data have a tangible impact on performances when applied to graph data. To do so, we experiment with a structure-agnostic model and a deep graph network in a robust and controlled environment on three different datasets. The benchmark is complemented by an investigation on the effect of structure-preserving regularization techniques on catastrophic forgetting. We find that replay is the most effective strategy in so far, which also benefits the most from the use of regularization. Our findings suggest interesting future research at the intersection of the continual and graph representation learning fields. Finally, we provide researchers with a flexible software framework to reproduce our results and carry out further experiments. },
keywords = {Continual learning, deep learning for graphs, graph data, structured data processing},
pubstate = {published},
tppubtype = {article}
}
In this work, we study the phenomenon of catastrophic forgetting in the graph representation learning scenario. The primary objective of the analysis is to understand whether classical continual learning techniques for flat and sequential data have a tangible impact on performances when applied to graph data. To do so, we experiment with a structure-agnostic model and a deep graph network in a robust and controlled environment on three different datasets. The benchmark is complemented by an investigation on the effect of structure-preserving regularization techniques on catastrophic forgetting. We find that replay is the most effective strategy in so far, which also benefits the most from the use of regularization. Our findings suggest interesting future research at the intersection of the continual and graph representation learning fields. Finally, we provide researchers with a flexible software framework to reproduce our results and carry out further experiments. Cossu, Andrea; Graffieti, Gabriele; Pellegrini, Lorenzo; Maltoni, Davide; Bacciu, Davide; Carta, Antonio; Lomonaco, Vincenzo
Is Class-Incremental Enough for Continual Learning? Journal Article
In: Frontiers in Artificial Intelligence, 5 , 2022, ISSN: 2624-8212.
Abstract | Links | BibTeX | Tags:
@article{10.3389/frai.2022.829842,
title = {Is Class-Incremental Enough for Continual Learning?},
author = {Andrea Cossu and Gabriele Graffieti and Lorenzo Pellegrini and Davide Maltoni and Davide Bacciu and Antonio Carta and Vincenzo Lomonaco},
url = {https://www.frontiersin.org/article/10.3389/frai.2022.829842},
doi = {10.3389/frai.2022.829842},
issn = {2624-8212},
year = {2022},
date = {2022-01-01},
journal = {Frontiers in Artificial Intelligence},
volume = {5},
abstract = {The ability of a model to learn continually can be empirically assessed in different continual learning scenarios. Each scenario defines the constraints and the opportunities of the learning environment. Here, we challenge the current trend in the continual learning literature to experiment mainly on class-incremental scenarios, where classes present in one experience are never revisited. We posit that an excessive focus on this setting may be limiting for future research on continual learning, since class-incremental scenarios artificially exacerbate catastrophic forgetting, at the expense of other important objectives like forward transfer and computational efficiency. In many real-world environments, in fact, repetition of previously encountered concepts occurs naturally and contributes to softening the disruption of previous knowledge. We advocate for a more in-depth study of alternative continual learning scenarios, in which repetition is integrated by design in the stream of incoming information. Starting from already existing proposals, we describe the advantages such class-incremental with repetition scenarios could offer for a more comprehensive assessment of continual learning models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The ability of a model to learn continually can be empirically assessed in different continual learning scenarios. Each scenario defines the constraints and the opportunities of the learning environment. Here, we challenge the current trend in the continual learning literature to experiment mainly on class-incremental scenarios, where classes present in one experience are never revisited. We posit that an excessive focus on this setting may be limiting for future research on continual learning, since class-incremental scenarios artificially exacerbate catastrophic forgetting, at the expense of other important objectives like forward transfer and computational efficiency. In many real-world environments, in fact, repetition of previously encountered concepts occurs naturally and contributes to softening the disruption of previous knowledge. We advocate for a more in-depth study of alternative continual learning scenarios, in which repetition is integrated by design in the stream of incoming information. Starting from already existing proposals, we describe the advantages such class-incremental with repetition scenarios could offer for a more comprehensive assessment of continual learning models.2021
Lanciano, Giacomo; Galli, Filippo; Cucinotta, Tommaso; Bacciu, Davide; Passarella, Andrea
Predictive Auto-scaling with OpenStack Monasca Conference
Proceedings of the 14th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2021), 2021.
Abstract | Links | BibTeX | Tags: cloud computing, pervasive computing, recurrent neural network, Sequential data
@conference{Lanciano2021,
title = { Predictive Auto-scaling with OpenStack Monasca},
author = {Giacomo Lanciano and Filippo Galli and Tommaso Cucinotta and Davide Bacciu and Andrea Passarella},
url = {https://arxiv.org/abs/2111.02133, Arxiv},
doi = {10.1145/3468737.3494104},
year = {2021},
date = {2021-12-06},
urldate = {2021-12-06},
booktitle = {Proceedings of the 14th IEEE/ACM International Conference on Utility and Cloud Computing (UCC 2021)},
pages = {1-10},
abstract = {Cloud auto-scaling mechanisms are typically based on reactive automation rules that scale a cluster whenever some metric, e.g., the average CPU usage among instances, exceeds a predefined threshold. Tuning these rules becomes particularly cumbersome when scaling-up a cluster involves non-negligible times to bootstrap new instances, as it happens frequently in production cloud services.
To deal with this problem, we propose an architecture for auto-scaling cloud services based on the status in which the system is expected to evolve in the near future. Our approach leverages on time-series forecasting techniques, like those based on machine learning and artificial neural networks, to predict the future dynamics of key metrics, e.g., resource consumption metrics, and apply a threshold-based scaling policy on them. The result is a predictive automation policy that is able, for instance, to automatically anticipate peaks in the load of a cloud application and trigger ahead of time appropriate scaling actions to accommodate the expected increase in traffic.
We prototyped our approach as an open-source OpenStack component, which relies on, and extends, the monitoring capabilities offered by Monasca, resulting in the addition of predictive metrics that can be leveraged by orchestration components like Heat or Senlin. We show experimental results using a recurrent neural network and a multi-layer perceptron as predictor, which are compared with a simple linear regression and a traditional non-predictive auto-scaling policy. However, the proposed framework allows for the easy customization of the prediction policy as needed. },
keywords = {cloud computing, pervasive computing, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {conference}
}
Cloud auto-scaling mechanisms are typically based on reactive automation rules that scale a cluster whenever some metric, e.g., the average CPU usage among instances, exceeds a predefined threshold. Tuning these rules becomes particularly cumbersome when scaling-up a cluster involves non-negligible times to bootstrap new instances, as it happens frequently in production cloud services.
To deal with this problem, we propose an architecture for auto-scaling cloud services based on the status in which the system is expected to evolve in the near future. Our approach leverages on time-series forecasting techniques, like those based on machine learning and artificial neural networks, to predict the future dynamics of key metrics, e.g., resource consumption metrics, and apply a threshold-based scaling policy on them. The result is a predictive automation policy that is able, for instance, to automatically anticipate peaks in the load of a cloud application and trigger ahead of time appropriate scaling actions to accommodate the expected increase in traffic.
We prototyped our approach as an open-source OpenStack component, which relies on, and extends, the monitoring capabilities offered by Monasca, resulting in the addition of predictive metrics that can be leveraged by orchestration components like Heat or Senlin. We show experimental results using a recurrent neural network and a multi-layer perceptron as predictor, which are compared with a simple linear regression and a traditional non-predictive auto-scaling policy. However, the proposed framework allows for the easy customization of the prediction policy as needed. Cossu, Andrea; Carta, Antonio; Lomonaco, Vincenzo; Bacciu, Davide
Continual Learning for Recurrent Neural Networks: an Empirical Evaluation Journal Article
In: Neural Networks, 143 , pp. 607-627, 2021.
Abstract | Links | BibTeX | Tags: Continual learning, deep learning, recurrent neural network, Sequential data
@article{Cossu2021b,
title = {Continual Learning for Recurrent Neural Networks: an Empirical Evaluation},
author = {Andrea Cossu and Antonio Carta and Vincenzo Lomonaco and Davide Bacciu},
url = {https://arxiv.org/abs/2103.07492, Arxiv},
year = {2021},
date = {2021-12-03},
urldate = {2021-08-30},
journal = {Neural Networks},
volume = {143},
pages = {607-627},
abstract = { Learning continuously during all model lifetime is fundamental to deploy machine learning solutions robust to drifts in the data distribution. Advances in Continual Learning (CL) with recurrent neural networks could pave the way to a large number of applications where incoming data is non stationary, like natural language processing and robotics. However, the existing body of work on the topic is still fragmented, with approaches which are application-specific and whose assessment is based on heterogeneous learning protocols and datasets. In this paper, we organize the literature on CL for sequential data processing by providing a categorization of the contributions and a review of the benchmarks. We propose two new benchmarks for CL with sequential data based on existing datasets, whose characteristics resemble real-world applications. We also provide a broad empirical evaluation of CL and Recurrent Neural Networks in class-incremental scenario, by testing their ability to mitigate forgetting with a number of different strategies which are not specific to sequential data processing. Our results highlight the key role played by the sequence length and the importance of a clear specification of the CL scenario. },
keywords = {Continual learning, deep learning, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {article}
}
Learning continuously during all model lifetime is fundamental to deploy machine learning solutions robust to drifts in the data distribution. Advances in Continual Learning (CL) with recurrent neural networks could pave the way to a large number of applications where incoming data is non stationary, like natural language processing and robotics. However, the existing body of work on the topic is still fragmented, with approaches which are application-specific and whose assessment is based on heterogeneous learning protocols and datasets. In this paper, we organize the literature on CL for sequential data processing by providing a categorization of the contributions and a review of the benchmarks. We propose two new benchmarks for CL with sequential data based on existing datasets, whose characteristics resemble real-world applications. We also provide a broad empirical evaluation of CL and Recurrent Neural Networks in class-incremental scenario, by testing their ability to mitigate forgetting with a number of different strategies which are not specific to sequential data processing. Our results highlight the key role played by the sequence length and the importance of a clear specification of the CL scenario. Bacciu, Davide; Carta, Antonio; Sarli, Daniele Di; Gallicchio, Claudio; Lomonaco, Vincenzo; Petroni, Salvatore
Towards Functional Safety Compliance of Recurrent Neural Networks Conference
Proceedings of the International Conference on AI for People (CAIP 2021), 2021.
Abstract | Links | BibTeX | Tags: distributed learning, humanistic intelligence, internet of things, recurrent neural network, trustworthy AI
@conference{BacciuCAIP2021,
title = {Towards Functional Safety Compliance of Recurrent Neural Networks},
author = {Davide Bacciu and Antonio Carta and Daniele Di Sarli and Claudio Gallicchio and Vincenzo Lomonaco and Salvatore Petroni},
url = {https://aiforpeople.org/conference/assets/papers/CAIP21-P09.pdf, Open Access PDF},
year = {2021},
date = {2021-11-20},
booktitle = {Proceedings of the International Conference on AI for People (CAIP 2021)},
abstract = {Deploying Autonomous Driving systems requires facing some novel challenges for the Automotive industry. One of the most critical aspects that can severely compromise their deployment is Functional Safety. The ISO 26262 standard provides guidelines to ensure Functional Safety of road vehicles. However, this standard is not suitable to develop Artificial Intelligence
based systems such as systems based on Recurrent Neural Networks (RNNs). To address this issue, in this paper we propose a new methodology, composed of three steps. The first step is the robustness evaluation of the RNN against inputs perturbations. Then, a proper set of safety measures must be defined according to the model’s robustness, where less robust models will require stronger mitigation. Finally, the functionality of the entire system must be extensively tested
according to Safety Of The Intended Functionality (SOTIF) guidelines, providing quantitative results about the occurrence of unsafe scenarios, and by evaluating appropriate Safety Performance Indicators.},
keywords = {distributed learning, humanistic intelligence, internet of things, recurrent neural network, trustworthy AI},
pubstate = {published},
tppubtype = {conference}
}
Deploying Autonomous Driving systems requires facing some novel challenges for the Automotive industry. One of the most critical aspects that can severely compromise their deployment is Functional Safety. The ISO 26262 standard provides guidelines to ensure Functional Safety of road vehicles. However, this standard is not suitable to develop Artificial Intelligence
based systems such as systems based on Recurrent Neural Networks (RNNs). To address this issue, in this paper we propose a new methodology, composed of three steps. The first step is the robustness evaluation of the RNN against inputs perturbations. Then, a proper set of safety measures must be defined according to the model’s robustness, where less robust models will require stronger mitigation. Finally, the functionality of the entire system must be extensively tested
according to Safety Of The Intended Functionality (SOTIF) guidelines, providing quantitative results about the occurrence of unsafe scenarios, and by evaluating appropriate Safety Performance Indicators. Carta, Antonio; Sperduti, Alessandro; Bacciu, Davide
Encoding-based Memory for Recurrent Neural Networks Journal Article
In: Neurocomputing, 456 , pp. 407-420, 2021.
Abstract | Links | BibTeX | Tags: autoencoder, deep learning, memory networks, recurrent neural network, Sequential data
@article{Carta2021b,
title = {Encoding-based Memory for Recurrent Neural Networks},
author = {Antonio Carta and Alessandro Sperduti and Davide Bacciu},
url = {https://arxiv.org/abs/2001.11771, Arxiv},
doi = {10.1016/j.neucom.2021.04.051},
year = {2021},
date = {2021-10-07},
urldate = {2021-04-20},
journal = {Neurocomputing},
volume = {456},
pages = {407-420},
publisher = {Elsevier},
abstract = {Learning to solve sequential tasks with recurrent models requires the ability to memorize long sequences and to extract task-relevant features from them. In this paper, we study the memorization subtask from the point of view of the design and training of recurrent neural networks. We propose a new model, the Linear Memory Network, which features an encoding-based memorization component built with a linear autoencoder for sequences. We extend the memorization component with a modular memory that encodes the hidden state sequence at different sampling frequencies. Additionally, we provide a specialized training algorithm that initializes the memory to efficiently encode the hidden activations of the network. The experimental results on synthetic and real-world datasets show that specializing the training algorithm to train the memorization component always improves the final performance whenever the memorization of long sequences is necessary to solve the problem. },
keywords = {autoencoder, deep learning, memory networks, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {article}
}
Learning to solve sequential tasks with recurrent models requires the ability to memorize long sequences and to extract task-relevant features from them. In this paper, we study the memorization subtask from the point of view of the design and training of recurrent neural networks. We propose a new model, the Linear Memory Network, which features an encoding-based memorization component built with a linear autoencoder for sequences. We extend the memorization component with a modular memory that encodes the hidden state sequence at different sampling frequencies. Additionally, we provide a specialized training algorithm that initializes the memory to efficiently encode the hidden activations of the network. The experimental results on synthetic and real-world datasets show that specializing the training algorithm to train the memorization component always improves the final performance whenever the memorization of long sequences is necessary to solve the problem. Averta, Giuseppe; Barontini, Federica; Valdambrini, Irene; Cheli, Paolo; Bacciu, Davide; Bianchi, Matteo
Learning to Prevent Grasp Failure with Soft Hands: From Online Prediction to Dual-Arm Grasp Recovery Journal Article
In: Advanced Intelligent Systems, 2021.
Abstract | Links | BibTeX | Tags: deep learning, machine vision, recurrent neural network, robotics, Sequential data
@article{Averta2021,
title = {Learning to Prevent Grasp Failure with Soft Hands: From Online Prediction to Dual-Arm Grasp Recovery},
author = {Giuseppe Averta and Federica Barontini and Irene Valdambrini and Paolo Cheli and Davide Bacciu and Matteo Bianchi},
doi = {10.1002/aisy.202100146},
year = {2021},
date = {2021-10-07},
journal = {Advanced Intelligent Systems},
abstract = {Soft hands allow to simplify the grasp planning to achieve a successful grasp, thanks to their intrinsic adaptability. At the same time, their usage poses new challenges, related to the adoption of classical sensing techniques originally developed for rigid end defectors, which provide fundamental information, such as to detect object slippage. Under this regard, model-based approaches for the processing of the gathered information are hard to use, due to the difficulties in modeling hand–object interaction when softness is involved. To overcome these limitations, in this article, we proposed to combine distributed tactile sensing and machine learning (recurrent neural network) to detect sliding conditions for a soft robotic hand mounted on a robotic manipulator, targeting the prediction of the grasp failure event and the direction of sliding. The outcomes of these predictions allow for an online triggering of a compensatory action performed with a second robotic arm–hand system, to prevent the failure. Despite the fact that the network is trained only with spherical and cylindrical objects, we demonstrate high generalization capabilities of our framework, achieving a correct prediction of the failure direction in 75% of cases, and a 85% of successful regrasps, for a selection of 12 objects of common use.},
keywords = {deep learning, machine vision, recurrent neural network, robotics, Sequential data},
pubstate = {published},
tppubtype = {article}
}
Soft hands allow to simplify the grasp planning to achieve a successful grasp, thanks to their intrinsic adaptability. At the same time, their usage poses new challenges, related to the adoption of classical sensing techniques originally developed for rigid end defectors, which provide fundamental information, such as to detect object slippage. Under this regard, model-based approaches for the processing of the gathered information are hard to use, due to the difficulties in modeling hand–object interaction when softness is involved. To overcome these limitations, in this article, we proposed to combine distributed tactile sensing and machine learning (recurrent neural network) to detect sliding conditions for a soft robotic hand mounted on a robotic manipulator, targeting the prediction of the grasp failure event and the direction of sliding. The outcomes of these predictions allow for an online triggering of a compensatory action performed with a second robotic arm–hand system, to prevent the failure. Despite the fact that the network is trained only with spherical and cylindrical objects, we demonstrate high generalization capabilities of our framework, achieving a correct prediction of the failure direction in 75% of cases, and a 85% of successful regrasps, for a selection of 12 objects of common use. Cossu, Andrea; Bacciu, Davide; Carta, Antonio; Gallicchio, Claudio; Lomonaco, Vincenzo
Continual Learning with Echo State Networks Conference
Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021)
, 275-280 , 2021.
Abstract | Links | BibTeX | Tags: Continual learning, Echo state networks, recurrent neural network, Sequential data
@conference{Cossu2021,
title = { Continual Learning with Echo State Networks },
author = {Andrea Cossu and Davide Bacciu and Antonio Carta and Claudio Gallicchio and Vincenzo Lomonaco},
editor = {Michel Verleysen},
url = {https://arxiv.org/abs/2105.07674, Arxiv},
doi = {10.14428/esann/2021.ES2021-80},
year = {2021},
date = {2021-10-06},
urldate = {2021-10-06},
booktitle = {Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021)
},
volume = {275-280},
abstract = { Continual Learning (CL) refers to a learning setup where data is non stationary and the model has to learn without forgetting existing knowledge. The study of CL for sequential patterns revolves around trained recurrent networks. In this work, instead, we introduce CL in the context of Echo State Networks (ESNs), where the recurrent component is kept fixed. We provide the first evaluation of catastrophic forgetting in ESNs and we highlight the benefits in using CL strategies which are not applicable to trained recurrent models. Our results confirm the ESN as a promising model for CL and open to its use in streaming scenarios. },
keywords = {Continual learning, Echo state networks, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {conference}
}
Continual Learning (CL) refers to a learning setup where data is non stationary and the model has to learn without forgetting existing knowledge. The study of CL for sequential patterns revolves around trained recurrent networks. In this work, instead, we introduce CL in the context of Echo State Networks (ESNs), where the recurrent component is kept fixed. We provide the first evaluation of catastrophic forgetting in ESNs and we highlight the benefits in using CL strategies which are not applicable to trained recurrent models. Our results confirm the ESN as a promising model for CL and open to its use in streaming scenarios. Bacciu, Davide; Bianchi, Filippo Maria; Paassen, Benjamin; Alippi, Cesare
Deep learning for graphs Conference
Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021), 2021.
Abstract | Links | BibTeX | Tags: deep learning, deep learning for graphs, graph data, structured data processing
@conference{Bacciu2021c,
title = { Deep learning for graphs},
author = {Davide Bacciu and Filippo Maria Bianchi and Benjamin Paassen and Cesare Alippi},
editor = {Michel Verleysen},
doi = {10.14428/esann/2021.ES2021-5},
year = {2021},
date = {2021-10-06},
urldate = {2021-10-06},
booktitle = {Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021)},
pages = {89-98},
abstract = { Deep learning for graphs encompasses all those models endowed with multiple layers of abstraction, which operate on data represented as graphs. The most common building blocks of these models are graph encoding layers, which compute a vector embedding for each node in a graph based on a sum of messages received from its neighbors. However, the family also includes architectures with decoders from vectors to graphs and models that process time-varying graphs and hypergraphs. In this paper, we provide an overview of the key concepts in the field, point towards open questions, and frame the contributions of the ESANN 2021 special session into the broader context of deep learning for graphs. },
keywords = {deep learning, deep learning for graphs, graph data, structured data processing},
pubstate = {published},
tppubtype = {conference}
}
Deep learning for graphs encompasses all those models endowed with multiple layers of abstraction, which operate on data represented as graphs. The most common building blocks of these models are graph encoding layers, which compute a vector embedding for each node in a graph based on a sum of messages received from its neighbors. However, the family also includes architectures with decoders from vectors to graphs and models that process time-varying graphs and hypergraphs. In this paper, we provide an overview of the key concepts in the field, point towards open questions, and frame the contributions of the ESANN 2021 special session into the broader context of deep learning for graphs. Dukic, Haris; Deligiorgis, Georgios; Sepe, Pierpaolo; Bacciu, Davide; Trincavelli, Marco
Inductive learning for product assortment graph completion Conference
Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021), 2021.
Abstract | Links | BibTeX | Tags: deep learning for graphs, graph data, recommendation systems
@conference{Dukic2021,
title = {Inductive learning for product assortment graph completion},
author = {Haris Dukic and Georgios Deligiorgis and Pierpaolo Sepe and Davide Bacciu and Marco Trincavelli},
editor = {Michel Verleysen},
doi = {10.14428/esann/2021.ES2021-73},
year = {2021},
date = {2021-10-06},
urldate = {2021-10-06},
booktitle = {Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021)},
pages = {129-134},
abstract = { Global retailers have assortments that contain hundreds of thousands of products that can be linked by several types of relationships like style compatibility, "bought together", "watched together", etc. Graphs are a natural representation for assortments, where products are nodes and relations are edges. Relations like style compatibility are often produced by a manual process and therefore do not cover uniformly the whole graph. We propose to use inductive learning to enhance a graph encoding style compatibility of a fashion assortment, leveraging rich node information comprising textual descriptions and visual data. Then, we show how the proposed graph enhancement improves substantially the performance on transductive tasks with a minor impact on graph sparsity. },
keywords = {deep learning for graphs, graph data, recommendation systems},
pubstate = {published},
tppubtype = {conference}
}
Global retailers have assortments that contain hundreds of thousands of products that can be linked by several types of relationships like style compatibility, "bought together", "watched together", etc. Graphs are a natural representation for assortments, where products are nodes and relations are edges. Relations like style compatibility are often produced by a manual process and therefore do not cover uniformly the whole graph. We propose to use inductive learning to enhance a graph encoding style compatibility of a fashion assortment, leveraging rich node information comprising textual descriptions and visual data. Then, we show how the proposed graph enhancement improves substantially the performance on transductive tasks with a minor impact on graph sparsity. Valenti, Andrea; Berti, Stefano; Bacciu, Davide
Calliope - A Polyphonic Music Transformer Conference
Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021), 2021.
Links | BibTeX | Tags: artificial creativity, autoencoder, deep learning, generative model, music generation, transformer
@conference{Valenti2021b,
title = {Calliope - A Polyphonic Music Transformer},
author = {Andrea Valenti and Stefano Berti and Davide Bacciu},
editor = {Michel Verleysen},
url = { The polyphonic nature of music makes the application of deep learning to music modelling a challenging task. On the other hand, the Transformer architecture seems to be a good fit for this kind of data. In this work, we present Calliope, a novel autoencoder model based on Transformers for the efficient modelling of multi-track sequences of polyphonic music. The experiments show that our model is able to improve the state of the art on musical sequence reconstruction and generation, with remarkably good results especially on long sequences. },
doi = {10.14428/esann/2021.ES2021-63},
year = {2021},
date = {2021-10-06},
urldate = {2021-10-06},
booktitle = {Proceedings of the 29th European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2021)},
pages = {405-410},
keywords = {artificial creativity, autoencoder, deep learning, generative model, music generation, transformer},
pubstate = {published},
tppubtype = {conference}
}
Schoitsch, Erwin; Mylonas, Georgios (Ed.)
Supporting Privacy Preservation by Distributed and Federated Learning on the Edge Periodical
ERCIM News, 127 , 2021.
Links | BibTeX | Tags: artificial intelligence, Continual learning, edge AI, federated learning, humanistic intelligence, reservoir computing, trustworthy AI
@periodical{Bacciu2021e,
title = {Supporting Privacy Preservation by Distributed and Federated Learning on the Edge},
author = { Davide Bacciu and Patrizio Dazzi and Alberto Gotta},
editor = {Erwin Schoitsch and Georgios Mylonas},
url = {https://ercim-news.ercim.eu/en127/r-i/supporting-privacy-preservation-by-distributed-and-federated-learning-on-the-edge},
year = {2021},
date = {2021-09-30},
issuetitle = {ERCIM News},
volume = {127},
keywords = {artificial intelligence, Continual learning, edge AI, federated learning, humanistic intelligence, reservoir computing, trustworthy AI},
pubstate = {published},
tppubtype = {periodical}
}
Bacciu, Davide; Conte, Alessio; Grossi, Roberto; Landolfi, Francesco; Marino, Andrea
K-Plex Cover Pooling for Graph Neural Networks Journal Article
In: Data Mining and Knowledge Discovery, 2021, (Accepted also as paper to the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD 2021)).
Abstract | Links | BibTeX | Tags: deep learning, deep learning for graphs, graph data, graph pooling, structured data processing
@article{Bacciu2021b,
title = {K-Plex Cover Pooling for Graph Neural Networks},
author = {Davide Bacciu and Alessio Conte and Roberto Grossi and Francesco Landolfi and Andrea Marino},
editor = {Annalisa Appice and Sergio Escalera and José A. Gámez and Heike Trautmann},
url = {https://link.springer.com/article/10.1007/s10618-021-00779-z, Published version},
doi = {10.1007/s10618-021-00779-z},
year = {2021},
date = {2021-09-13},
journal = {Data Mining and Knowledge Discovery},
abstract = {raph pooling methods provide mechanisms for structure reduction that are intended to ease the diffusion of context between nodes further in the graph, and that typically leverage community discovery mechanisms or node and edge pruning heuristics. In this paper, we introduce a novel pooling technique which borrows from classical results in graph theory that is non-parametric and generalizes well to graphs of different nature and connectivity patterns. Our pooling method, named KPlexPool, builds on the concepts of graph covers and k-plexes, i.e. pseudo-cliques where each node can miss up to k links. The experimental evaluation on benchmarks on molecular and social graph classification shows that KPlexPool achieves state of the art performances against both parametric and non-parametric pooling methods in the literature, despite generating pooled graphs based solely on topological information.},
note = {Accepted also as paper to the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML-PKDD 2021)},
keywords = {deep learning, deep learning for graphs, graph data, graph pooling, structured data processing},
pubstate = {published},
tppubtype = {article}
}
raph pooling methods provide mechanisms for structure reduction that are intended to ease the diffusion of context between nodes further in the graph, and that typically leverage community discovery mechanisms or node and edge pruning heuristics. In this paper, we introduce a novel pooling technique which borrows from classical results in graph theory that is non-parametric and generalizes well to graphs of different nature and connectivity patterns. Our pooling method, named KPlexPool, builds on the concepts of graph covers and k-plexes, i.e. pseudo-cliques where each node can miss up to k links. The experimental evaluation on benchmarks on molecular and social graph classification shows that KPlexPool achieves state of the art performances against both parametric and non-parametric pooling methods in the literature, despite generating pooled graphs based solely on topological information. Macher, G.; Akarmazyan, S.; Armengaud, E.; Bacciu, D.; Calandra, C.; Danzinger, H.; Dazzi, P.; Davalas, C.; Gennaro, M. C. De; Dimitriou, A.; Dobaj, J.; Dzambic, M.; Giraudi, L.; Girbal, S.; Michail, D.; Peroglio, R.; Potenza, R.; Pourdanesh, F.; Seidl, M.; Sardianos, C.; Tserpes, K.; Valtl, J.; Varlamis, I.; Veledar, O.
Dependable Integration Concepts for Human-Centric AI-based Systems Workshop
Proceedings of the 40th International Conference on Computer Safety, Reliability and Security (SafeComp 2021), Springer, 2021, (Invited discussion paper).
BibTeX | Tags: dependable AI, humanistic intelligence, trustworthy AI
@workshop{Macher2021,
title = {Dependable Integration Concepts for Human-Centric AI-based Systems},
author = {G. Macher and S. Akarmazyan and E. Armengaud and D. Bacciu and C. Calandra and H. Danzinger and P. Dazzi and C. Davalas and M.C. De Gennaro and A. Dimitriou and J. Dobaj and M. Dzambic and L. Giraudi and S. Girbal and D. Michail and R. Peroglio and R. Potenza and F. Pourdanesh and M. Seidl and C. Sardianos and K. Tserpes and J. Valtl and I. Varlamis and O. Veledar },
year = {2021},
date = {2021-09-07},
urldate = {2021-09-07},
booktitle = {Proceedings of the 40th International Conference on Computer Safety, Reliability and Security (SafeComp 2021)},
pages = {11-23},
publisher = {Springer},
note = {Invited discussion paper},
keywords = {dependable AI, humanistic intelligence, trustworthy AI},
pubstate = {published},
tppubtype = {workshop}
}
Macher, Georg; Armengaud, Eric; Bacciu, Davide; Dobaj, Jürgen; Dzambic, Maid; Seidl, Matthias; Veledar, Omar
Dependable Integration Concepts for Human-Centric AI-based Systems Workshop
Proceedings of the 16th International Workshop on Dependable Smart Embedded Cyber-Physical Systems and Systems-of-Systems (DECSoS 2021), 2021.
Abstract | BibTeX | Tags: dependable AI, humanistic intelligence, trustworthy AI
@workshop{Macher2021b,
title = {Dependable Integration Concepts for Human-Centric AI-based Systems},
author = {Georg Macher and Eric Armengaud and Davide Bacciu and Jürgen Dobaj and Maid Dzambic and Matthias Seidl and Omar Veledar},
year = {2021},
date = {2021-09-07},
booktitle = {Proceedings of the 16th International Workshop on Dependable Smart Embedded Cyber-Physical Systems and Systems-of-Systems (DECSoS 2021)},
abstract = {The rising demand to integrate adaptive, cloud-based and/or AI-based systems is also increasing the need for associated dependability concepts. However, the practical processes and methods covering the whole life cycle still need to be instantiated. The assurance of dependability continues to be an open issue with no common solution. That is especially the case for novel AI and/or dynamical runtime-based approaches. This work focuses on engineering methods and design patterns that support the development of dependable AI-based autonomous systems. The paper presents the related body of knowledge of the TEACHING project and multiple automotive domain regulation activities and industrial working groups. It also considers the dependable architectural concepts and their impactful applicability to different scenarios to ensure the dependability of AI-based Cyber-Physical Systems of Systems (CPSoS) in the automotive domain. The paper shines the light on potential paths for dependable integration of AI-based systems into the automotive domain through identified analysis methods and targets. },
keywords = {dependable AI, humanistic intelligence, trustworthy AI},
pubstate = {published},
tppubtype = {workshop}
}
The rising demand to integrate adaptive, cloud-based and/or AI-based systems is also increasing the need for associated dependability concepts. However, the practical processes and methods covering the whole life cycle still need to be instantiated. The assurance of dependability continues to be an open issue with no common solution. That is especially the case for novel AI and/or dynamical runtime-based approaches. This work focuses on engineering methods and design patterns that support the development of dependable AI-based autonomous systems. The paper presents the related body of knowledge of the TEACHING project and multiple automotive domain regulation activities and industrial working groups. It also considers the dependable architectural concepts and their impactful applicability to different scenarios to ensure the dependability of AI-based Cyber-Physical Systems of Systems (CPSoS) in the automotive domain. The paper shines the light on potential paths for dependable integration of AI-based systems into the automotive domain through identified analysis methods and targets. Resta, Michele; Monreale, Anna; Bacciu, Davide
Occlusion-based Explanations in Deep Recurrent Models for Biomedical Signals Journal Article
In: Entropy, 23 (8), pp. 1064, 2021, (Special issue on Representation Learning).
Abstract | Links | BibTeX | Tags: biomedical data, explainable AI, recurrent neural network, Sequential data
@article{Resta2021,
title = { Occlusion-based Explanations in Deep Recurrent Models for Biomedical Signals },
author = {Michele Resta and Anna Monreale and Davide Bacciu},
editor = {Fabio Aiolli and Mirko Polato},
doi = {10.3390/e23081064},
year = {2021},
date = {2021-09-01},
urldate = {2021-09-01},
journal = {Entropy},
volume = {23},
number = {8},
pages = {1064},
abstract = { The biomedical field is characterized by an ever-increasing production of sequential data, which often come under the form of biosignals capturing the time-evolution of physiological processes, such as blood pressure and brain activity. This has motivated a large body of research dealing with the development of machine learning techniques for the predictive analysis of such biosignals. Unfortunately, in high-stakes decision making, such as clinical diagnosis, the opacity of machine learning models becomes a crucial aspect to be addressed in order to increase the trust and adoption of AI technology. In this paper we propose a model agnostic explanation method, based on occlusion, enabling the learning of the input influence on the model predictions. We specifically target problems involving the predictive analysis of time-series data and the models which are typically used to deal with data of such nature, i.e. recurrent neural networks. Our approach is able to provide two different kinds of explanations: one suitable for technical experts, who need to verify the quality and correctness of machine learning models, and one suited to physicians, who need to understand the rationale underlying the prediction to take aware decisions. A wide experimentation on different physiological data demonstrate the effectiveness of our approach, both in classification and regression tasks. },
note = {Special issue on Representation Learning},
keywords = {biomedical data, explainable AI, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {article}
}
The biomedical field is characterized by an ever-increasing production of sequential data, which often come under the form of biosignals capturing the time-evolution of physiological processes, such as blood pressure and brain activity. This has motivated a large body of research dealing with the development of machine learning techniques for the predictive analysis of such biosignals. Unfortunately, in high-stakes decision making, such as clinical diagnosis, the opacity of machine learning models becomes a crucial aspect to be addressed in order to increase the trust and adoption of AI technology. In this paper we propose a model agnostic explanation method, based on occlusion, enabling the learning of the input influence on the model predictions. We specifically target problems involving the predictive analysis of time-series data and the models which are typically used to deal with data of such nature, i.e. recurrent neural networks. Our approach is able to provide two different kinds of explanations: one suitable for technical experts, who need to verify the quality and correctness of machine learning models, and one suited to physicians, who need to understand the rationale underlying the prediction to take aware decisions. A wide experimentation on different physiological data demonstrate the effectiveness of our approach, both in classification and regression tasks. Bacciu, Davide; Akarmazyan, Siranush; Armengaud, Eric; Bacco, Manlio; Bravos, George; Calandra, Calogero; Carlini, Emanuele; Carta, Antonio; Cassara, Pietro; Coppola, Massimo; Davalas, Charalampos; Dazzi, Patrizio; Degennaro, Maria Carmela; Sarli, Daniele Di; Dobaj, Jürgen; Gallicchio, Claudio; Girbal, Sylvain; Gotta, Alberto; Groppo, Riccardo; Lomonaco, Vincenzo; Macher, Georg; Mazzei, Daniele; Mencagli, Gabriele; Michail, Dimitrios; Micheli, Alessio; Peroglio, Roberta; Petroni, Salvatore; Potenza, Rosaria; Pourdanesh, Farank; Sardianos, Christos; Tserpes, Konstantinos; Tagliabò, Fulvio; Valtl, Jakob; Varlamis, Iraklis; Veledar, Omar (Ed.)
TEACHING - Trustworthy autonomous cyber-physical applications through human-centred intelligence Conference
Proceedings of the 2021 IEEE International Conference on Omni-Layer Intelligent Systems (COINS) , 2021.
Abstract | Links | BibTeX | Tags: artificial intelligence, Continual learning, federated learning, humanistic intelligence, reservoir computing, trustworthy AI
@conference{Bacciu2021d,
title = {TEACHING - Trustworthy autonomous cyber-physical applications through human-centred intelligence},
editor = {Davide Bacciu and Siranush Akarmazyan and Eric Armengaud and Manlio Bacco and George Bravos and Calogero Calandra and Emanuele Carlini and Antonio Carta and Pietro Cassara and Massimo Coppola and Charalampos Davalas and Patrizio Dazzi and Maria Carmela Degennaro and Daniele Di Sarli and Jürgen Dobaj and Claudio Gallicchio and Sylvain Girbal and Alberto Gotta and Riccardo Groppo and Vincenzo Lomonaco and Georg Macher and Daniele Mazzei and Gabriele Mencagli and Dimitrios Michail and Alessio Micheli and Roberta Peroglio and Salvatore Petroni and Rosaria Potenza and Farank Pourdanesh and Christos Sardianos and Konstantinos Tserpes and Fulvio Tagliabò and Jakob Valtl and Iraklis Varlamis and Omar Veledar},
doi = {10.1109/COINS51742.2021.9524099},
year = {2021},
date = {2021-08-23},
urldate = {2021-08-23},
booktitle = {Proceedings of the 2021 IEEE International Conference on Omni-Layer Intelligent Systems (COINS) },
abstract = {This paper discusses the perspective of the H2020 TEACHING project on the next generation of autonomous applications running in a distributed and highly heterogeneous environment comprising both virtual and physical resources spanning the edge-cloud continuum. TEACHING puts forward a human-centred vision leveraging the physiological, emotional, and cognitive state of the users as a driver for the adaptation and optimization of the autonomous applications. It does so by building a distributed, embedded and federated learning system complemented by methods and tools to enforce its dependability, security and privacy preservation. The paper discusses the main concepts of the TEACHING approach and singles out the main AI-related research challenges associated with it. Further, we provide a discussion of the design choices for the TEACHING system to tackle the aforementioned challenges},
keywords = {artificial intelligence, Continual learning, federated learning, humanistic intelligence, reservoir computing, trustworthy AI},
pubstate = {published},
tppubtype = {conference}
}
This paper discusses the perspective of the H2020 TEACHING project on the next generation of autonomous applications running in a distributed and highly heterogeneous environment comprising both virtual and physical resources spanning the edge-cloud continuum. TEACHING puts forward a human-centred vision leveraging the physiological, emotional, and cognitive state of the users as a driver for the adaptation and optimization of the autonomous applications. It does so by building a distributed, embedded and federated learning system complemented by methods and tools to enforce its dependability, security and privacy preservation. The paper discusses the main concepts of the TEACHING approach and singles out the main AI-related research challenges associated with it. Further, we provide a discussion of the design choices for the TEACHING system to tackle the aforementioned challenges Rosasco, Andrea; Carta, Antonio; Cossu, Andrea; Lomonaco, Vincenzo; Bacciu, Davide
Distilled Replay: Overcoming Forgetting through Synthetic Samples Workshop
IJCAI 2021 workshop on continual semi-supervised learning (CSSL 2021) , 2021.
Abstract | Links | BibTeX | Tags: Continual learning, dataset distillation, deep learning
@workshop{Rosasco2021,
title = {Distilled Replay: Overcoming Forgetting through Synthetic Samples},
author = {Andrea Rosasco and Antonio Carta and Andrea Cossu and Vincenzo Lomonaco and Davide Bacciu},
url = {https://arxiv.org/abs/2103.15851, Arxiv},
year = {2021},
date = {2021-08-19},
urldate = {2021-08-19},
booktitle = {IJCAI 2021 workshop on continual semi-supervised learning (CSSL 2021) },
abstract = {Replay strategies are Continual Learning techniques which mitigate catastrophic forgetting by keeping a buffer of patterns from previous experience, which are interleaved with new data during training. The amount of patterns stored in the buffer is a critical parameter which largely influences the final performance and the memory footprint of the approach. This work introduces Distilled Replay, a novel replay strategy for Continual Learning which is able to mitigate forgetting by keeping a very small buffer (up to pattern per class) of highly informative samples. Distilled Replay builds the buffer through a distillation process which compresses a large dataset into a tiny set of informative examples. We show the effectiveness of our Distilled Replay against naive replay, which randomly samples patterns from the dataset, on four popular Continual Learning benchmarks.},
keywords = {Continual learning, dataset distillation, deep learning},
pubstate = {published},
tppubtype = {workshop}
}
Replay strategies are Continual Learning techniques which mitigate catastrophic forgetting by keeping a buffer of patterns from previous experience, which are interleaved with new data during training. The amount of patterns stored in the buffer is a critical parameter which largely influences the final performance and the memory footprint of the approach. This work introduces Distilled Replay, a novel replay strategy for Continual Learning which is able to mitigate forgetting by keeping a very small buffer (up to pattern per class) of highly informative samples. Distilled Replay builds the buffer through a distillation process which compresses a large dataset into a tiny set of informative examples. We show the effectiveness of our Distilled Replay against naive replay, which randomly samples patterns from the dataset, on four popular Continual Learning benchmarks. Atzeni, Daniele; Bacciu, Davide; Errica, Federico; Micheli, Alessio
Modeling Edge Features with Deep Bayesian Graph Networks Conference
Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021), IEEE IEEE, 2021.
Abstract | Links | BibTeX | Tags: deep learning for graphs, generative model, hidden Markov models, structured data processing
@conference{Atzeni2021,
title = { Modeling Edge Features with Deep Bayesian Graph Networks},
author = {Daniele Atzeni and Davide Bacciu and Federico Errica and Alessio Micheli},
doi = {10.1109/IJCNN52387.2021.9533430},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021)},
publisher = {IEEE},
organization = {IEEE},
abstract = {We propose an extension of the Contextual Graph Markov Model, a deep and probabilistic machine learning model for graphs, to model the distribution of edge features. Our approach is architectural, as we introduce an additional Bayesian network mapping edge features into discrete states to be used by the original model. In doing so, we are also able to build richer graph representations even in the absence of edge features, which is confirmed by the performance improvements on standard graph classification benchmarks. Moreover, we successfully test our proposal in a graph regression scenario where edge features are of fundamental importance, and we show that the learned edge representation provides substantial performance improvements against the original model on three link prediction tasks. By keeping the computational complexity linear in the number of edges, the proposed model is amenable to large-scale graph processing.},
keywords = {deep learning for graphs, generative model, hidden Markov models, structured data processing},
pubstate = {published},
tppubtype = {conference}
}
We propose an extension of the Contextual Graph Markov Model, a deep and probabilistic machine learning model for graphs, to model the distribution of edge features. Our approach is architectural, as we introduce an additional Bayesian network mapping edge features into discrete states to be used by the original model. In doing so, we are also able to build richer graph representations even in the absence of edge features, which is confirmed by the performance improvements on standard graph classification benchmarks. Moreover, we successfully test our proposal in a graph regression scenario where edge features are of fundamental importance, and we show that the learned edge representation provides substantial performance improvements against the original model on three link prediction tasks. By keeping the computational complexity linear in the number of edges, the proposed model is amenable to large-scale graph processing. Numeroso, Danilo; Bacciu, Davide
MEG: Generating Molecular Counterfactual Explanations for Deep Graph Networks Conference
Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021), IEEE 2021.
BibTeX | Tags: deep learning for graphs, explainable AI, graph data, structured data processing
@conference{Numeroso2021,
title = {MEG: Generating Molecular Counterfactual Explanations for Deep Graph Networks},
author = {Danilo Numeroso and Davide Bacciu},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021)},
organization = {IEEE},
keywords = {deep learning for graphs, explainable AI, graph data, structured data processing},
pubstate = {published},
tppubtype = {conference}
}
Bacciu, Davide; Sarli, Daniele Di; Faraji, Pouria; Gallicchio, Claudio; Micheli, Alessio
Federated Reservoir Computing Neural Networks Conference
Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021), IEEE, 2021.
Abstract | Links | BibTeX | Tags: activity recognition, distributed learning, Echo state networks, federated learning, internet of things, pervasive computing, randomized networks, reservoir computing, Sequential data
@conference{BacciuIJCNN2021,
title = {Federated Reservoir Computing Neural Networks},
author = {Davide Bacciu and Daniele Di Sarli and Pouria Faraji and Claudio Gallicchio and Alessio Micheli},
doi = {10.1109/IJCNN52387.2021.9534035},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021)},
publisher = {IEEE},
abstract = {A critical aspect in Federated Learning is the aggregation strategy for the combination of multiple models, trained on the edge, into a single model that incorporates all the knowledge in the federation. Common Federated Learning approaches for Recurrent Neural Networks (RNNs) do not provide guarantees on the predictive performance of the aggregated model. In this paper we show how the use of Echo State Networks (ESNs), which are efficient state-of-the-art RNN models for time-series processing, enables a form of federation that is optimal in the sense that it produces models mathematically equivalent to the corresponding centralized model. Furthermore, the proposed method is compliant with privacy constraints. The proposed method, which we denote as Incremental Federated Learning, is experimentally evaluated against an averaging strategy on two datasets for human state and activity recognition.},
keywords = {activity recognition, distributed learning, Echo state networks, federated learning, internet of things, pervasive computing, randomized networks, reservoir computing, Sequential data},
pubstate = {published},
tppubtype = {conference}
}
A critical aspect in Federated Learning is the aggregation strategy for the combination of multiple models, trained on the edge, into a single model that incorporates all the knowledge in the federation. Common Federated Learning approaches for Recurrent Neural Networks (RNNs) do not provide guarantees on the predictive performance of the aggregated model. In this paper we show how the use of Echo State Networks (ESNs), which are efficient state-of-the-art RNN models for time-series processing, enables a form of federation that is optimal in the sense that it produces models mathematically equivalent to the corresponding centralized model. Furthermore, the proposed method is compliant with privacy constraints. The proposed method, which we denote as Incremental Federated Learning, is experimentally evaluated against an averaging strategy on two datasets for human state and activity recognition. Bacciu, Davide; Podda, Marco
GraphGen-Redux: a Fast and Lightweight Recurrent Model for Labeled Graph Generation Conference
Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021), IEEE 2021.
Abstract | Links | BibTeX | Tags: deep learning, deep learning for graphs, generative model, graph data
@conference{BacciuPoddaIJCNN2021,
title = {GraphGen-Redux: a Fast and Lightweight Recurrent Model for Labeled Graph Generation},
author = {Davide Bacciu and Marco Podda},
doi = {10.1109/IJCNN52387.2021.9533743},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {Proceedings of the International Joint Conference on Neural Networks (IJCNN 2021)},
organization = {IEEE},
abstract = {The problem of labeled graph generation is gaining attention in the Deep Learning community. The task is challenging due to the sparse and discrete nature of graph spaces. Several approaches have been proposed in the literature, most of which require to transform the graphs into sequences that encode their structure and labels and to learn the distribution of such sequences through an auto-regressive generative model. Among this family of approaches, we focus on the Graphgen model. The preprocessing phase of Graphgen transforms graphs into unique edge sequences called Depth-First Search (DFS) codes, such that two isomorphic graphs are assigned the same DFS code. Each element of a DFS code is associated with a graph edge: specifically, it is a quintuple comprising one node identifier for each of the two endpoints, their node labels, and the edge label. Graphgen learns to generate such sequences auto-regressively and models the probability of each component of the quintuple independently. While effective, the independence assumption made by the model is too loose to capture the complex label dependencies of real-world graphs precisely. By introducing a novel graph preprocessing approach, we are able to process the labeling information of both nodes and edges jointly. The corresponding model, which we term Graphgen-redux, improves upon the generative performances of Graphgen in a wide range of datasets of chemical and social graphs. In addition, it uses approximately 78% fewer parameters than the vanilla variant and requires 50% fewer epochs of training on average.},
keywords = {deep learning, deep learning for graphs, generative model, graph data},
pubstate = {published},
tppubtype = {conference}
}
The problem of labeled graph generation is gaining attention in the Deep Learning community. The task is challenging due to the sparse and discrete nature of graph spaces. Several approaches have been proposed in the literature, most of which require to transform the graphs into sequences that encode their structure and labels and to learn the distribution of such sequences through an auto-regressive generative model. Among this family of approaches, we focus on the Graphgen model. The preprocessing phase of Graphgen transforms graphs into unique edge sequences called Depth-First Search (DFS) codes, such that two isomorphic graphs are assigned the same DFS code. Each element of a DFS code is associated with a graph edge: specifically, it is a quintuple comprising one node identifier for each of the two endpoints, their node labels, and the edge label. Graphgen learns to generate such sequences auto-regressively and models the probability of each component of the quintuple independently. While effective, the independence assumption made by the model is too loose to capture the complex label dependencies of real-world graphs precisely. By introducing a novel graph preprocessing approach, we are able to process the labeling information of both nodes and edges jointly. The corresponding model, which we term Graphgen-redux, improves upon the generative performances of Graphgen in a wide range of datasets of chemical and social graphs. In addition, it uses approximately 78% fewer parameters than the vanilla variant and requires 50% fewer epochs of training on average. Errica, Federico; Bacciu, Davide; Micheli, Alessio
Graph Mixture Density Networks Conference
Proceedings of the 38th International Conference on Machine Learning (ICML 2021), PMLR, 2021.
Links | BibTeX | Tags: deep learning for graphs, generative model, graph data, structured data processing
@conference{Errica2021,
title = {Graph Mixture Density Networks},
author = {Federico Errica and Davide Bacciu and Alessio Micheli},
url = {https://proceedings.mlr.press/v139/errica21a.html, PDF},
year = {2021},
date = {2021-07-18},
urldate = {2021-07-18},
booktitle = {Proceedings of the 38th International Conference on Machine Learning (ICML 2021)},
pages = {3025-3035},
publisher = {PMLR},
keywords = {deep learning for graphs, generative model, graph data, structured data processing},
pubstate = {published},
tppubtype = {conference}
}
Lomonaco, Vincenzo; Pellegrini, Lorenzo; Cossu, Andrea; Carta, Antonio; Graffieti, Gabriele; Hayes, Tyler L; Lange, Matthias De; Masana, Marc; Pomponi, Jary; van de Ven, Gido; Mundt, Martin; She, Qi; Cooper, Keiland; Forest, Jeremy; Belouadah, Eden; Calderara, Simone; Parisi, German I; Cuzzolin, Fabio; Tolias, Andreas; Scardapane, Simone; Antiga, Luca; Amhad, Subutai; Popescu, Adrian; Kanan, Christopher; van de Weijer, Joost; Tuytelaars, Tinne; Bacciu, Davide; Maltoni, Davide
Avalanche: an End-to-End Library for Continual Learning Workshop
Proceedings of the CVPR 2021 Workshop on Continual Learning , IEEE, 2021.
Links | BibTeX | Tags: Continual learning, deep learning, software
@workshop{lomonaco2021avalanche,
title = {Avalanche: an End-to-End Library for Continual Learning},
author = {Vincenzo Lomonaco and Lorenzo Pellegrini and Andrea Cossu and Antonio Carta and Gabriele Graffieti and Tyler L Hayes and Matthias De Lange and Marc Masana and Jary Pomponi and Gido van de Ven and Martin Mundt and Qi She and Keiland Cooper and Jeremy Forest and Eden Belouadah and Simone Calderara and German I Parisi and Fabio Cuzzolin and Andreas Tolias and Simone Scardapane and Luca Antiga and Subutai Amhad and Adrian Popescu and Christopher Kanan and Joost van de Weijer and Tinne Tuytelaars and Davide Bacciu and Davide Maltoni},
url = {https://arxiv.org/abs/2104.00405, Arxiv},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
booktitle = {Proceedings of the CVPR 2021 Workshop on Continual Learning },
pages = {3600-3610},
publisher = {IEEE},
keywords = {Continual learning, deep learning, software},
pubstate = {published},
tppubtype = {workshop}
}
Sattar, Asma; Bacciu, Davide
Context-aware Graph Convolutional Autoencoder Conference
Proceedings of the 16th International Work Conference on Artificial Neural Networks (IWANN 2021), 12862 , LNCS Springer, 2021.
Abstract | Links | BibTeX | Tags: deep learning for graphs, graph data, recommendation systems
@conference{Sattar2021,
title = {Context-aware Graph Convolutional Autoencoder},
author = {Asma Sattar and Davide Bacciu
},
doi = {10.1007/978-3-030-85030-2_23},
year = {2021},
date = {2021-06-16},
urldate = {2021-06-16},
booktitle = {Proceedings of the 16th International Work Conference on Artificial Neural Networks (IWANN 2021)},
volume = {12862},
pages = { 279-290},
publisher = {Springer},
series = {LNCS},
abstract = {Recommendation problems can be addressed as link prediction tasks in a bipartite graph between user and item nodes, labelled with rating on edges. Existing matrix completion approaches model the user’s opinion on items by ignoring context information that can instead be associated with the edges of the bipartite graph. Context is an important factor to be considered as it heavily affects opinions and preferences. Following this line of research, this paper proposes a graph convolutional auto-encoder approach which considers users’ opinion on items as well as the static node features and context information on edges. Our graph encoder produces a representation of users and items from the perspective of context, static features, and rating opinion. The empirical analysis on three real-world datasets shows that the proposed approach outperforms recent state-of-the-art recommendation systems.},
keywords = {deep learning for graphs, graph data, recommendation systems},
pubstate = {published},
tppubtype = {conference}
}
Recommendation problems can be addressed as link prediction tasks in a bipartite graph between user and item nodes, labelled with rating on edges. Existing matrix completion approaches model the user’s opinion on items by ignoring context information that can instead be associated with the edges of the bipartite graph. Context is an important factor to be considered as it heavily affects opinions and preferences. Following this line of research, this paper proposes a graph convolutional auto-encoder approach which considers users’ opinion on items as well as the static node features and context information on edges. Our graph encoder produces a representation of users and items from the perspective of context, static features, and rating opinion. The empirical analysis on three real-world datasets shows that the proposed approach outperforms recent state-of-the-art recommendation systems. Bacciu, Davide; Sarli, Daniele Di; Gallicchio, Claudio; Micheli, Alessio; Puccinelli, Niccolo
Benchmarking Reservoir and Recurrent Neural Networks for Human State and Activity Recognition Conference
Proceedings of the 16th International Work Conference on Artificial Neural Networks (IWANN 2021), 12862 , Springer, 2021.
Abstract | Links | BibTeX | Tags: activity recognition, Echo state networks, recurrent neural network, reservoir computing
@conference{Bacciu2021,
title = {Benchmarking Reservoir and Recurrent Neural Networks for Human State and Activity Recognition},
author = {Davide Bacciu and Daniele Di Sarli and Claudio Gallicchio and Alessio Micheli and Niccolo Puccinelli},
doi = {10.1007/978-3-030-85099-9_14},
year = {2021},
date = {2021-06-16},
urldate = {2021-06-16},
booktitle = {Proceedings of the 16th International Work Conference on Artificial Neural Networks (IWANN 2021)},
volume = {12862},
pages = {168-179},
publisher = {Springer},
abstract = {Monitoring of human states from streams of sensor data is an appealing applicative area for Recurrent Neural Network (RNN) models. In such a scenario, Echo State Network (ESN) models from the Reservoir Computing paradigm can represent good candidates due to the efficient training algorithms, which, compared to fully trainable RNNs, definitely ease embedding on edge devices.
In this paper, we provide an experimental analysis aimed at assessing the performance of ESNs on tasks of human state and activity recognition, in both shallow and deep setups. Our analysis is conducted in comparison with vanilla RNNs, Long Short-Term Memory, Gated Recurrent Units, and their deep variations. Our empirical results on several datasets clearly indicate that, despite their simplicity, ESNs are able to achieve a level of accuracy that is competitive with those models that require full adaptation of the parameters. From a broader perspective, our analysis also points out that recurrent networks can be a first choice for the class of tasks under consideration, in particular in their deep and gated variants.},
keywords = {activity recognition, Echo state networks, recurrent neural network, reservoir computing},
pubstate = {published},
tppubtype = {conference}
}
Monitoring of human states from streams of sensor data is an appealing applicative area for Recurrent Neural Network (RNN) models. In such a scenario, Echo State Network (ESN) models from the Reservoir Computing paradigm can represent good candidates due to the efficient training algorithms, which, compared to fully trainable RNNs, definitely ease embedding on edge devices.
In this paper, we provide an experimental analysis aimed at assessing the performance of ESNs on tasks of human state and activity recognition, in both shallow and deep setups. Our analysis is conducted in comparison with vanilla RNNs, Long Short-Term Memory, Gated Recurrent Units, and their deep variations. Our empirical results on several datasets clearly indicate that, despite their simplicity, ESNs are able to achieve a level of accuracy that is competitive with those models that require full adaptation of the parameters. From a broader perspective, our analysis also points out that recurrent networks can be a first choice for the class of tasks under consideration, in particular in their deep and gated variants. Ferrari, Elisa; Bacciu, Davide
Addressing Fairness, Bias and Class Imbalance in Machine Learning: the FBI-loss Unpublished
Online on Arxiv, 2021.
Abstract | Links | BibTeX | Tags: bioinformatics, biomedical data, deep learning, trustworthy AI
@unpublished{Ferrari2021,
title = {Addressing Fairness, Bias and Class Imbalance in Machine Learning: the FBI-loss},
author = {Elisa Ferrari and Davide Bacciu},
url = {https://arxiv.org/abs/2105.06345, Arxiv},
year = {2021},
date = {2021-05-13},
urldate = {2021-05-13},
abstract = {Resilience to class imbalance and confounding biases, together with the assurance of fairness guarantees are highly desirable properties of autonomous decision-making systems with real-life impact. Many different targeted solutions have been proposed to address separately these three problems, however a unifying perspective seems to be missing. With this work, we provide a general formalization, showing that they are different expressions of unbalance. Following this intuition, we formulate a unified loss correction to address issues related to Fairness, Biases and Imbalances (FBI-loss). The correction capabilities of the proposed approach are assessed on three real-world benchmarks, each associated to one of the issues under consideration, and on a family of synthetic data in order to better investigate the effectiveness of our loss on tasks with different complexities. The empirical results highlight that the flexible formulation of the FBI-loss leads also to competitive performances with respect to literature solutions specialised for the single problems.},
howpublished = {Online on Arxiv},
keywords = {bioinformatics, biomedical data, deep learning, trustworthy AI},
pubstate = {published},
tppubtype = {unpublished}
}
Resilience to class imbalance and confounding biases, together with the assurance of fairness guarantees are highly desirable properties of autonomous decision-making systems with real-life impact. Many different targeted solutions have been proposed to address separately these three problems, however a unifying perspective seems to be missing. With this work, we provide a general formalization, showing that they are different expressions of unbalance. Following this intuition, we formulate a unified loss correction to address issues related to Fairness, Biases and Imbalances (FBI-loss). The correction capabilities of the proposed approach are assessed on three real-world benchmarks, each associated to one of the issues under consideration, and on a family of synthetic data in order to better investigate the effectiveness of our loss on tasks with different complexities. The empirical results highlight that the flexible formulation of the FBI-loss leads also to competitive performances with respect to literature solutions specialised for the single problems. Carta, Antonio; Cossu, Andrea; Errica, Federico; Bacciu, Davide
Catastrophic Forgetting in Deep Graph Networks: an Introductory Benchmark for Graph Classification Workshop
The Web Conference 2021 Workshop on Graph Learning Benchmarks (GLB21), 2021.
Abstract | BibTeX | Tags: Continual learning, deep learning for graphs, structured data processing
@workshop{Carta2021,
title = { Catastrophic Forgetting in Deep Graph Networks: an Introductory Benchmark for Graph Classification },
author = {Antonio Carta and Andrea Cossu and Federico Errica and Davide Bacciu},
year = {2021},
date = {2021-04-12},
booktitle = {The Web Conference 2021 Workshop on Graph Learning Benchmarks (GLB21)},
abstract = {In this work, we study the phenomenon of catastrophic forgetting in the graph representation learning scenario. The primary objective of the analysis is to understand whether classical continual learning techniques for flat and sequential data have a tangible impact on performances when applied to graph data. To do so, we experiment with a structure-agnostic model and a deep graph network in a robust and controlled environment on three different datasets. The benchmark is complemented by an investigation on the effect of structure-preserving regularization techniques on catastrophic forgetting. We find that replay is the most effective strategy in so far, which also benefits the most from the use of regularization. Our findings suggest interesting future research at the intersection of the continual and graph representation learning fields. Finally, we provide researchers with a flexible software framework to reproduce our results and carry out further experiments.},
keywords = {Continual learning, deep learning for graphs, structured data processing},
pubstate = {published},
tppubtype = {workshop}
}
In this work, we study the phenomenon of catastrophic forgetting in the graph representation learning scenario. The primary objective of the analysis is to understand whether classical continual learning techniques for flat and sequential data have a tangible impact on performances when applied to graph data. To do so, we experiment with a structure-agnostic model and a deep graph network in a robust and controlled environment on three different datasets. The benchmark is complemented by an investigation on the effect of structure-preserving regularization techniques on catastrophic forgetting. We find that replay is the most effective strategy in so far, which also benefits the most from the use of regularization. Our findings suggest interesting future research at the intersection of the continual and graph representation learning fields. Finally, we provide researchers with a flexible software framework to reproduce our results and carry out further experiments. Errica, Federico; Giulini, Marco; Bacciu, Davide; Menichetti, Roberto; Micheli, Alessio; Potestio, Raffaello
A deep graph network-enhanced sampling approach to efficiently explore the space of reduced representations of proteins Journal Article
In: Frontiers in Molecular Biosciences, 8 , pp. 136, 2021.
Links | BibTeX | Tags: deep learning, deep learning for graphs, graph data, structured data processing
@article{errica_deep_2021,
title = {A deep graph network-enhanced sampling approach to efficiently explore the space of reduced representations of proteins},
author = {Federico Errica and Marco Giulini and Davide Bacciu and Roberto Menichetti and Alessio Micheli and Raffaello Potestio},
doi = {10.3389/fmolb.2021.637396},
year = {2021},
date = {2021-02-28},
journal = {Frontiers in Molecular Biosciences},
volume = {8},
pages = {136},
publisher = {Frontiers},
keywords = {deep learning, deep learning for graphs, graph data, structured data processing},
pubstate = {published},
tppubtype = {article}
}
Bontempi, Gianluca; Chavarriaga, Ricardo; Canck, Hans De; Girardi, Emanuela; Hoos, Holger; Kilbane-Dawe, Iarla; Ball, Tonio; Nowé, Ann; Sousa, Jose; Bacciu, Davide; Aldinucci, Marco; Domenico, Manlio De; Saffiotti, Alessandro; Maratea, Marco
The CLAIRE COVID-19 initiative: approach, experiences and recommendations Journal Article
In: Ethics and Information Technology, 2021.
Links | BibTeX | Tags: artificial intelligence, bioinformatics, biomedical data
@article{Bontempi2021,
title = {The CLAIRE COVID-19 initiative: approach, experiences and recommendations},
author = {Gianluca Bontempi and Ricardo Chavarriaga and Hans De Canck and Emanuela Girardi and Holger Hoos and Iarla Kilbane-Dawe and Tonio Ball and Ann Nowé and Jose Sousa and Davide Bacciu and Marco Aldinucci and Manlio De Domenico and Alessandro Saffiotti and Marco Maratea},
doi = {10.1007/s10676-020-09567-7},
year = {2021},
date = {2021-02-09},
journal = {Ethics and Information Technology},
keywords = {artificial intelligence, bioinformatics, biomedical data},
pubstate = {published},
tppubtype = {article}
}
Michele Barsotti Andrea Valenti, Davide Bacciu; Ascari, Luca
A Deep Classifier for Upper-Limbs Motor Anticipation Tasks in an Online BCI Setting Journal Article
In: Bioengineering , 2021.
Links | BibTeX | Tags: autoencoder, biomedical data, deep learning, Sequential data
@article{Valenti2021,
title = {A Deep Classifier for Upper-Limbs Motor Anticipation Tasks in an Online BCI Setting},
author = {Andrea Valenti, Michele Barsotti, Davide Bacciu and Luca Ascari
},
url = {https://www.mdpi.com/2306-5354/8/2/21, Open Access },
doi = {10.3390/bioengineering8020021},
year = {2021},
date = {2021-02-05},
journal = {Bioengineering },
keywords = {autoencoder, biomedical data, deep learning, Sequential data},
pubstate = {published},
tppubtype = {article}
}
Bacciu, Davide; Bertoncini, Gioele; Morelli, Davide
Topographic mapping for quality inspection and intelligent filtering of smart-bracelet data Journal Article
In: Neural Computing Applications, 2021.
Links | BibTeX | Tags: biomedical data, data visualization, explainable AI, internet of things, multivariate time-series, self-organizing map
@article{BacciuNCA2020,
title = {Topographic mapping for quality inspection and intelligent filtering of smart-bracelet data},
author = {Davide Bacciu and Gioele Bertoncini and Davide Morelli},
doi = {10.1007/s00521-020-05600-4},
year = {2021},
date = {2021-01-04},
journal = {Neural Computing Applications},
keywords = {biomedical data, data visualization, explainable AI, internet of things, multivariate time-series, self-organizing map},
pubstate = {published},
tppubtype = {article}
}
Crecchi, Francesco; Melis, Marco; Sotgiu, Angelo; Bacciu, Davide; Biggio, Battista
FADER: Fast Adversarial Example Rejection Journal Article
In: Neurocomputing, 2021, ISSN: 0925-2312.
Links | BibTeX | Tags: adversarial examples, adversarial machine learning, deep learning, detection, evasion attacks, rbf networks
@article{CRECCHI2021,
title = {FADER: Fast Adversarial Example Rejection},
author = {Francesco Crecchi and Marco Melis and Angelo Sotgiu and Davide Bacciu and Battista Biggio},
url = {https://arxiv.org/abs/2010.09119, Arxiv},
doi = {https://doi.org/10.1016/j.neucom.2021.10.082},
issn = {0925-2312},
year = {2021},
date = {2021-01-01},
journal = {Neurocomputing},
keywords = {adversarial examples, adversarial machine learning, deep learning, detection, evasion attacks, rbf networks},
pubstate = {published},
tppubtype = {article}
}
2020
Ronchetti, Matteo; Bacciu, Davide
Generative Tomography Reconstruction Workshop
34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Deep Learning and Inverse Problems, 2020.
Abstract | Links | BibTeX | Tags: adversarial learning, biomedical data, deep learning, generative model, inverse problems, machine vision
@workshop{tomographyNeurips2020,
title = {Generative Tomography Reconstruction},
author = {Matteo Ronchetti and Davide Bacciu},
url = {https://arxiv.org/pdf/2010.14933.pdf, PDF},
year = {2020},
date = {2020-12-11},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Deep Learning and Inverse Problems},
abstract = {We propose an end-to-end differentiable architecture for tomography reconstruc-1tion that directly maps a noisy sinogram into a denoised reconstruction. Compared2to existing approaches our end-to-end architecture produces more accurate recon-3structions while using less parameters and time. We also propose a generative4model that, given a noisy sinogram, can sample realistic reconstructions. This5generative model can be used as prior inside an iterative process that, by tak-6ing into consideration the physical model, can reduce artifacts and errors in the7reconstructions.},
keywords = {adversarial learning, biomedical data, deep learning, generative model, inverse problems, machine vision},
pubstate = {published},
tppubtype = {workshop}
}
We propose an end-to-end differentiable architecture for tomography reconstruc-1tion that directly maps a noisy sinogram into a denoised reconstruction. Compared2to existing approaches our end-to-end architecture produces more accurate recon-3structions while using less parameters and time. We also propose a generative4model that, given a noisy sinogram, can sample realistic reconstructions. This5generative model can be used as prior inside an iterative process that, by tak-6ing into consideration the physical model, can reduce artifacts and errors in the7reconstructions. Bacciu, Davide; Conte, Alessio; Grossi, Roberto; Landolfi, Francesco; Marino, Andrea
K-plex Cover Pooling for Graph Neural Networks Workshop
34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Learning Meets Combinatorial Algorithms, 2020.
Abstract | BibTeX | Tags: deep learning, deep learning for graphs, graph data, graph pooling, structured data processing
@workshop{kplexWS2020,
title = {K-plex Cover Pooling for Graph Neural Networks},
author = {Davide Bacciu and Alessio Conte and Roberto Grossi and Francesco Landolfi and Andrea Marino},
year = {2020},
date = {2020-12-11},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Learning Meets Combinatorial Algorithms},
abstract = {We introduce a novel pooling technique which borrows from classical results in graph theory that is non-parametric and generalizes well to graphs of different nature and connectivity pattern. Our pooling method, named KPlexPool, builds on the concepts of graph covers and $k$-plexes, i.e. pseudo-cliques where each node can miss up to $k$ links. The experimental evaluation on molecular and social graph classification shows that KPlexPool achieves state of the art performances, supporting the intuition that well-founded graph-theoretic approaches can be effectively integrated in learning models for graphs. },
keywords = {deep learning, deep learning for graphs, graph data, graph pooling, structured data processing},
pubstate = {published},
tppubtype = {workshop}
}
We introduce a novel pooling technique which borrows from classical results in graph theory that is non-parametric and generalizes well to graphs of different nature and connectivity pattern. Our pooling method, named KPlexPool, builds on the concepts of graph covers and $k$-plexes, i.e. pseudo-cliques where each node can miss up to $k$ links. The experimental evaluation on molecular and social graph classification shows that KPlexPool achieves state of the art performances, supporting the intuition that well-founded graph-theoretic approaches can be effectively integrated in learning models for graphs. Bacciu, Davide; Numeroso, Danilo
Explaining Deep Graph Networks with Molecular Counterfactuals Workshop
34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Machine Learning for Molecules - Accepted as Contributed Talk (Oral), 2020.
Abstract | Links | BibTeX | Tags: deep learning for graphs, explainable AI, graph data, structured data processing
@workshop{megWS2020,
title = {Explaining Deep Graph Networks with Molecular Counterfactuals},
author = {Davide Bacciu and Danilo Numeroso},
url = {https://arxiv.org/pdf/2011.05134.pdf, Arxiv},
year = {2020},
date = {2020-12-11},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Machine Learning for Molecules - Accepted as Contributed Talk (Oral)},
abstract = {We present a novel approach to tackle explainability of deep graph networks in the context of molecule property prediction tasks, named MEG (Molecular Explanation Generator). We generate informative counterfactual explanations for a specific prediction under the form of (valid) compounds with high structural similarity and different predicted properties. We discuss preliminary results showing how the model can convey non-ML experts with key insights into the learning model focus in the neighborhood of a molecule. },
keywords = {deep learning for graphs, explainable AI, graph data, structured data processing},
pubstate = {published},
tppubtype = {workshop}
}
We present a novel approach to tackle explainability of deep graph networks in the context of molecule property prediction tasks, named MEG (Molecular Explanation Generator). We generate informative counterfactual explanations for a specific prediction under the form of (valid) compounds with high structural similarity and different predicted properties. We discuss preliminary results showing how the model can convey non-ML experts with key insights into the learning model focus in the neighborhood of a molecule. Carta, Antonio; Sperduti, Alessandro; Bacciu, Davide
Short-Term Memory Optimization in Recurrent Neural Networks by Autoencoder-based Initialization Workshop
34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Beyond BackPropagation: Novel Ideas for Training Neural Architectures, 2020.
Abstract | Links | BibTeX | Tags: deep learning, memory networks, recurrent neural network, Sequential data
@workshop{CartaNeuripsWS2020,
title = { Short-Term Memory Optimization in Recurrent Neural Networks by Autoencoder-based Initialization },
author = {Antonio Carta and Alessandro Sperduti and Davide Bacciu
},
url = {https://arxiv.org/abs/2011.02886, Arxiv},
year = {2020},
date = {2020-12-11},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Beyond BackPropagation: Novel Ideas for Training Neural Architectures},
abstract = {Training RNNs to learn long-term dependencies is difficult due to vanishing gradients. We explore an alternative solution based on explicit memorization using linear autoencoders for sequences, which allows to maximize the short-term memory and that can be solved with a closed-form solution without backpropagation. We introduce an initialization schema that pretrains the weights of a recurrent neural network to approximate the linear autoencoder of the input sequences and we show how such pretraining can better support solving hard classification tasks with long sequences. We test our approach on sequential and permuted MNIST. We show that the proposed approach achieves a much lower reconstruction error for long sequences and a better gradient propagation during the finetuning phase. },
keywords = {deep learning, memory networks, recurrent neural network, Sequential data},
pubstate = {published},
tppubtype = {workshop}
}
Training RNNs to learn long-term dependencies is difficult due to vanishing gradients. We explore an alternative solution based on explicit memorization using linear autoencoders for sequences, which allows to maximize the short-term memory and that can be solved with a closed-form solution without backpropagation. We introduce an initialization schema that pretrains the weights of a recurrent neural network to approximate the linear autoencoder of the input sequences and we show how such pretraining can better support solving hard classification tasks with long sequences. We test our approach on sequential and permuted MNIST. We show that the proposed approach achieves a much lower reconstruction error for long sequences and a better gradient propagation during the finetuning phase.