Iranzo-Sánchez, Javier; Iranzo-Sánchez, Jorge; Giménez, Adrià; Civera, Jorge; Juan, Alfons Segmentation-Free Streaming Machine Translation Journal Article Transactions of the Association for Computational Linguistics, 12 , pp. 1104-1121, 2024, (also accepted for presentation at ACL 2024). Abstract | Links | BibTeX | Tags: segmentation-free, streaming machine translation @article{Juan2024,
title = {Segmentation-Free Streaming Machine Translation},
author = {Javier Iranzo-Sánchez AND Jorge Iranzo-Sánchez AND Adrià Giménez AND Jorge Civera AND Alfons Juan},
url = {https://paperswithcode.com/paper/segmentation-free-streaming-machine
https://github.com/jairsan/Segmentation-Free_Streaming_Machine_Translation
https://arxiv.org/abs/2309.14823
https://2024.aclweb.org/program/tacl_papers/
https://www.mllp.upv.es/wp-content/uploads/2024/09/tacl_segfree_poster.pdf},
doi = {10.1162/tacl_a_00691},
year = {2024},
date = {2024-01-01},
journal = {Transactions of the Association for Computational Linguistics},
volume = {12},
pages = {1104-1121},
abstract = {Streaming Machine Translation (MT) is the task of translating an unbounded input text stream in real-time. The traditional cascade approach, which combines an Automatic Speech Recognition (ASR) and an MT system, relies on an intermediate segmentation step which splits the transcription stream into sentence-like units. However, the incorporation of a hard segmentation constrains the MT system and is a source of errors. This paper proposes a Segmentation-Free framework that enables the model to translate an unsegmented source stream by delaying the segmentation decision until the translation has been generated. Extensive experiments show how the proposed Segmentation-Free framework has better quality-latency trade-off than competing approaches that use an independent segmentation model.},
note = {also accepted for presentation at ACL 2024},
keywords = {segmentation-free, streaming machine translation},
pubstate = {published},
tppubtype = {article}
}
Streaming Machine Translation (MT) is the task of translating an unbounded input text stream in real-time. The traditional cascade approach, which combines an Automatic Speech Recognition (ASR) and an MT system, relies on an intermediate segmentation step which splits the transcription stream into sentence-like units. However, the incorporation of a hard segmentation constrains the MT system and is a source of errors. This paper proposes a Segmentation-Free framework that enables the model to translate an unsegmented source stream by delaying the segmentation decision until the translation has been generated. Extensive experiments show how the proposed Segmentation-Free framework has better quality-latency trade-off than competing approaches that use an independent segmentation model. |
Iranzo-Sánchez, Javier ; Civera, Jorge ; Juan, Alfons From Simultaneous to Streaming Machine Translation by Leveraging Streaming History Inproceedings Proc. 60th Annual Meeting of the Association for Computational Linguistics Vol. 1: Long Papers (ACL 2022), pp. 6972–6985, Dublin (Ireland), 2022. Abstract | Links | BibTeX | Tags: simultaneous machine translation, streaming machine translation @inproceedings{Iranzo-Sánchez2022,
title = {From Simultaneous to Streaming Machine Translation by Leveraging Streaming History},
author = {Iranzo-Sánchez, Javier and Civera, Jorge and Juan, Alfons},
url = {https://arxiv.org/abs/2203.02459
https://github.com/jairsan/Speech_Translation_Segmenter},
doi = {10.18653/v1/2022.acl-long.480},
year = {2022},
date = {2022-01-01},
booktitle = {Proc. 60th Annual Meeting of the Association for Computational Linguistics Vol. 1: Long Papers (ACL 2022)},
pages = {6972--6985},
address = {Dublin (Ireland)},
abstract = {Simultaneous Machine Translation is the task of incrementally translating an input sentence before it is fully available. Currently, simultaneous translation is carried out by translating each sentence independently of the previously translated text. More generally, Streaming MT can be understood as an extension of Simultaneous MT to the incremental translation of a continuous input text stream. In this work, a state-of-the-art simultaneous sentence-level MT system is extended to the streaming setup by leveraging the streaming history. Extensive empirical results are reported on IWSLT Translation Tasks, showing that leveraging the streaming history leads to significant quality gains. In particular, the proposed system proves to compare favorably to the best performing systems.},
keywords = {simultaneous machine translation, streaming machine translation},
pubstate = {published},
tppubtype = {inproceedings}
}
Simultaneous Machine Translation is the task of incrementally translating an input sentence before it is fully available. Currently, simultaneous translation is carried out by translating each sentence independently of the previously translated text. More generally, Streaming MT can be understood as an extension of Simultaneous MT to the incremental translation of a continuous input text stream. In this work, a state-of-the-art simultaneous sentence-level MT system is extended to the streaming setup by leveraging the streaming history. Extensive empirical results are reported on IWSLT Translation Tasks, showing that leveraging the streaming history leads to significant quality gains. In particular, the proposed system proves to compare favorably to the best performing systems. |