@article{1159, author = {Quchen Fu and Zhongwei Teng and Marco Georgaklis and Jules White and Douglas Schmidt}, title = {NL2CMD: An Updated Workflow for Natural Language to Bash Commands Translation}, abstract = {
Translating natural language into Bash Commands is an emerging research field that has gained attention in recent years. Most efforts have focused on producing more accurate translation models. To the best of our knowledge, only two datasets are available, with one based on the other. Both datasets involve scraping through known data sources (through platforms like stack overflow, crowdsourcing, etc.) and hiring experts to validate and correct either the English text or Bash Commands.
This paper provides two contributions to research on synthesizing Bash Commands from scratch. First, we describe a state-of-the-art translation model used to generate Bash Commands from the corresponding English text. Second, we introduce a new NL2CMD dataset that is automatically generated, involves minimal human intervention, and is over six times larger than prior datasets. Since the generation pipeline does not rely on existing Bash Commands, the distribution and types of commands can be custom adjusted. Our empirical results show how the scale and diversity of our dataset can offer unique opportunities for semantic parsing researchers.
}, year = {2023}, journal = {Journal of Machine Learning Theory, Applications and Practice}, volume = {1}, month = {02/2023}, url = {https://www.journal.riverpublishers.com/index.php/JMLTAP/article/view/8}, doi = {https://doi.org/10.13052/jmltapissn.2023.002}, }