Here you can find a consolidated (a.k.a. slowly updated) list of my publications. A frequently updated (and possibly noisy) list of works is available on my Google Scholar profile.
Please find below a short list of highlight publications for my recent activity.
Carta, Antonio; Sperduti, Alessandro; Bacciu, Davide Encoding-based Memory for Recurrent Neural Networks Journal Article In: Neurocomputing, vol. 456, pp. 407-420, 2021. Carta, Antonio; Sperduti, Alessandro; Bacciu, Davide Short-Term Memory Optimization in Recurrent Neural Networks by Autoencoder-based Initialization Workshop 34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Beyond BackPropagation: Novel Ideas for Training Neural Architectures, 2020. Carta, Antonio; Sperduti, Alessandro; Bacciu, Davide Incremental training of a recurrent neural network exploiting a multi-scale dynamic memory Conference Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases 2020 (ECML-PKDD 2020), Springer International Publishing, 2020. Bacciu, Davide; Carta, Antonio Sequential Sentence Embeddings for Semantic Similarity Conference Proceedings of the 2019 IEEE Symposium Series on Computational Intelligence (SSCI'19), IEEE, 2019. Bacciu, Davide; Carta, Antonio; Sperduti, Alessandro Linear Memory Networks Conference Proceedings of the 28th International Conference on Artificial Neural Networks (ICANN 2019), , vol. 11727, Lecture Notes in Computer Science Springer-Verlag, 2019.@article{Carta2021b,
title = {Encoding-based Memory for Recurrent Neural Networks},
author = {Antonio Carta and Alessandro Sperduti and Davide Bacciu},
url = {https://arxiv.org/abs/2001.11771, Arxiv},
doi = {10.1016/j.neucom.2021.04.051},
year = {2021},
date = {2021-10-07},
urldate = {2021-10-07},
journal = {Neurocomputing},
volume = {456},
pages = {407-420},
publisher = {Elsevier},
abstract = {Learning to solve sequential tasks with recurrent models requires the ability to memorize long sequences and to extract task-relevant features from them. In this paper, we study the memorization subtask from the point of view of the design and training of recurrent neural networks. We propose a new model, the Linear Memory Network, which features an encoding-based memorization component built with a linear autoencoder for sequences. We extend the memorization component with a modular memory that encodes the hidden state sequence at different sampling frequencies. Additionally, we provide a specialized training algorithm that initializes the memory to efficiently encode the hidden activations of the network. The experimental results on synthetic and real-world datasets show that specializing the training algorithm to train the memorization component always improves the final performance whenever the memorization of long sequences is necessary to solve the problem. },
keywords = {},
pubstate = {published},
tppubtype = {article}
}
@workshop{CartaNeuripsWS2020,
title = { Short-Term Memory Optimization in Recurrent Neural Networks by Autoencoder-based Initialization },
author = {Antonio Carta and Alessandro Sperduti and Davide Bacciu
},
url = {https://arxiv.org/abs/2011.02886, Arxiv},
year = {2020},
date = {2020-12-11},
urldate = {2020-12-11},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020), Workshop on Beyond BackPropagation: Novel Ideas for Training Neural Architectures},
abstract = {Training RNNs to learn long-term dependencies is difficult due to vanishing gradients. We explore an alternative solution based on explicit memorization using linear autoencoders for sequences, which allows to maximize the short-term memory and that can be solved with a closed-form solution without backpropagation. We introduce an initialization schema that pretrains the weights of a recurrent neural network to approximate the linear autoencoder of the input sequences and we show how such pretraining can better support solving hard classification tasks with long sequences. We test our approach on sequential and permuted MNIST. We show that the proposed approach achieves a much lower reconstruction error for long sequences and a better gradient propagation during the finetuning phase. },
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
@conference{ecml2020LMN,
title = {Incremental training of a recurrent neural network exploiting a multi-scale dynamic memory},
author = {Antonio Carta and Alessandro Sperduti and Davide Bacciu},
year = {2020},
date = {2020-06-05},
urldate = {2020-06-05},
booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases 2020 (ECML-PKDD 2020)},
publisher = {Springer International Publishing},
abstract = {The effectiveness of recurrent neural networks can be largely influenced by their ability to store into their dynamical memory information extracted from input sequences at different frequencies and timescales. Such a feature can be introduced into a neural architecture by an appropriate modularization of the dynamic memory. In this paper we propose a novel incrementally trained recurrent architecture targeting explicitly multi-scale learning. First, we show how to extend the architecture of a simple RNN by separating its hidden state into different modules, each subsampling the network hidden activations at different frequencies. Then, we discuss a training algorithm where new modules are iteratively added to the model to learn progressively longer dependencies. Each new module works at a slower frequency than the previous ones and it is initialized to encode the subsampled sequence of hidden activations. Experimental results on synthetic and real-world datasets on speech recognition and handwritten characters show that the modular architecture and the incremental training algorithm improve the ability of recurrent neural networks to capture long-term dependencies.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
@conference{ssci19,
title = {Sequential Sentence Embeddings for Semantic Similarity},
author = {Davide Bacciu and Antonio Carta},
doi = {10.1109/SSCI44817.2019.9002824},
year = {2019},
date = {2019-12-06},
urldate = {2019-12-06},
booktitle = {Proceedings of the 2019 IEEE Symposium Series on Computational Intelligence (SSCI'19)},
publisher = {IEEE},
abstract = { Sentence embeddings are distributed representations of sentences intended to be general features to be effectively used as input for deep learning models across different natural language processing tasks.
State-of-the-art sentence embeddings for semantic similarity are computed with a weighted average of pretrained word embeddings, hence completely ignoring the contribution of word ordering within a sentence in defining its semantics. We propose a novel approach to compute sentence embeddings for semantic similarity that exploits a linear autoencoder for sequences. The method can be trained in closed form and it is easy to fit on unlabeled sentences. Our method provides a grounded approach to identify and subtract common discourse from a sentence and its embedding, to remove associated uninformative features. Unlike similar methods in the literature (e.g. the popular Smooth Inverse Frequency approach), our method is able to account for word order. We show that our estimate of the common discourse vector improves the results on two different semantic similarity benchmarks when compared to related approaches from the literature.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
State-of-the-art sentence embeddings for semantic similarity are computed with a weighted average of pretrained word embeddings, hence completely ignoring the contribution of word ordering within a sentence in defining its semantics. We propose a novel approach to compute sentence embeddings for semantic similarity that exploits a linear autoencoder for sequences. The method can be trained in closed form and it is easy to fit on unlabeled sentences. Our method provides a grounded approach to identify and subtract common discourse from a sentence and its embedding, to remove associated uninformative features. Unlike similar methods in the literature (e.g. the popular Smooth Inverse Frequency approach), our method is able to account for word order. We show that our estimate of the common discourse vector improves the results on two different semantic similarity benchmarks when compared to related approaches from the literature.@conference{lmnArx18,
title = {Linear Memory Networks},
author = {Davide Bacciu and Antonio Carta and Alessandro Sperduti},
url = {https://arxiv.org/pdf/1811.03356.pdf},
doi = {10.1007/978-3-030-30487-4_40},
year = {2019},
date = {2019-09-17},
urldate = {2019-09-17},
booktitle = {Proceedings of the 28th International Conference on Artificial Neural Networks (ICANN 2019), },
volume = {11727},
pages = {513-525 },
publisher = {Springer-Verlag},
series = {Lecture Notes in Computer Science},
abstract = {Recurrent neural networks can learn complex transduction problems that require maintaining and actively exploiting a memory of their inputs. Such models traditionally consider memory and input-output functionalities indissolubly entangled. We introduce a novel recurrent architecture based on the conceptual separation between the functional input-output transformation and the memory mechanism, showing how they can be implemented through different neural components. By building on such conceptualization, we introduce the Linear Memory Network, a recurrent model comprising a feedforward neural network, realizing the non-linear functional transformation, and a linear autoencoder for sequences, implementing the memory component. The resulting architecture can be efficiently trained by building on closed-form solutions to linear optimization problems. Further, by exploiting equivalence results between feedforward and recurrent neural networks we devise a pretraining schema for the proposed architecture. Experiments on polyphonic music datasets show competitive results against gated recurrent networks and other state of the art models. },
keywords = {},
pubstate = {published},
tppubtype = {conference}
}