robert.bib

@inproceedings{kurfali-2025-conflicting,
  title = {Conflicting Needles in a Haystack: How {LLM}s behave when faced with contradictory information},
  author = {Kurfali, Murathan  and
      {\"O}stling, Robert},
  editor = {Christodoulopoulos, Christos  and
      Chakraborty, Tanmoy  and
      Rose, Carolyn  and
      Peng, Violet},
  booktitle = {Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing},
  month = nov,
  year = {2025},
  address = {Suzhou, China},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.emnlp-main.1742/},
  pages = {34349--34364},
  isbn = {979-8-89176-332-6},
  abstract = {Large Language Models (LLMs) have demonstrated an impressive ability to retrieve and summarize complex information, but their reliability in conflicting contexts remains poorly understood. We introduce an adversarial extension of the Needle-in-a-Haystack framework in which three mutually exclusive ``needles'' are embedded within long documents. By systematically manipulating factors such as position, repetition, layout, and domain relevance, we evaluate how LLMs handle contradictions. We find that models almost always fail to signal uncertainty and instead confidently select a single answer, exhibiting strong and consistent biases toward repetition, recency, and particular surface forms. We further analyze whether these patterns persist across model families and sizes, and we evaluate both probability-based and generation-based retrieval. Our framework highlights critical limitations in the robustness of current LLMs{---}including commercial systems{---}to contradiction. These limitations reveal potential shortcomings in RAG systems' ability to handle noisy or manipulated inputs and exposes risks for deployment in high-stakes applications.}
}

@inproceedings{ostling-etal-2025-llm,
  title = {{LLM}-based post-editing as reference-free {GEC} evaluation},
  author = {{\"O}stling, Robert  and
      Kurfali, Murathan  and
      Caines, Andrew},
  editor = {Kochmar, Ekaterina  and
      Alhafni, Bashar  and
      Bexte, Marie  and
      Burstein, Jill  and
      Horbach, Andrea  and
      Laarmann-Quante, Ronja  and
      Tack, Ana{\"i}s  and
      Yaneva, Victoria  and
      Yuan, Zheng},
  booktitle = {Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)},
  month = jul,
  year = {2025},
  address = {Vienna, Austria},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.bea-1.16/},
  doi = {10.18653/v1/2025.bea-1.16},
  pages = {213--224},
  isbn = {979-8-89176-270-1}
}

@inproceedings{tudor-etal-2025-prompting,
  title = {Prompting the Past: Exploring Zero-Shot Learning for Named Entity Recognition in Historical Texts Using Prompt-Answering {LLM}s},
  author = {Tudor, Crina  and
      Megyesi, Beata  and
      {\"O}stling, Robert},
  editor = {Kazantseva, Anna  and
      Szpakowicz, Stan  and
      Degaetano-Ortlieb, Stefania  and
      Bizzoni, Yuri  and
      Pagel, Janis},
  booktitle = {Proceedings of the 9th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2025)},
  month = may,
  year = {2025},
  address = {Albuquerque, New Mexico},
  publisher = {Association for Computational Linguistics},
  url = {https://aclanthology.org/2025.latechclfl-1.19/},
  pages = {216--226},
  isbn = {979-8-89176-241-1}
}

@inproceedings{Masciolini2025multigec,
  author = {Arianna Masciolini and Andrew Caines and Orphée De Clercq and Joni
        Kruijsbergen and Murathan Kurfalı and Ricardo Munoz Sánchez and Elena Volodina and Robert Östling},
  editor = {Ricardo Muñoz Sánchez and David Alfter and Jelena Kallas and Elena Volodina},
  title = {The MultiGEC-2025 Shared Task on Multilingual Grammatical Error
        Correction at NLP4CALL},
  booktitle = {Proceedings of the 14th Workshop on Natural Language Processing
        for Computer Assisted Language Learning},
  year = {2025},
  pages = {1--33},
  address = {Tallinn, Estonia}
}

@article{Levshina2024revered,
  author = {Levshina, Natalia and Koptjevskaja-Tamm, Maria and Östling, Robert},
  title = {Revered and reviled: a sentiment analysis of female and male referents in three languages},
  journal = {Frontiers in Communication},
  volume = {9},
  year = {2024},
  url = {https://www.frontiersin.org/articles/10.3389/fcomm.2024.1266407},
  doi = {10.3389/fcomm.2024.1266407},
  issn = {2297-900X},
  abstract = {Our study contributes to the less explored domain of lexical typology, focusing on semantic prosody and connotation. Semantic derogation, or pejoration of nouns referring to women, whereby such words acquire connotations and further denotations of social pejoration, immorality and/or loose sexuality, has been a very prominent question in studies on gender and language (change). It has been argued that pejoration emerges due to the general derogatory attitudes toward female referents. However, the evidence for systematic differences in connotations of female- vs. male-related words is fragmentary and often fairly impressionistic; moreover, many researchers argue that expressed sentiments toward women (as well as men) often are ambivalent. One should also expect gender differences in connotations to have decreased in the recent years, thanks to the advances of feminism and social progress. We test these ideas in a study of positive and negative connotations of feminine and masculine term pairs such as woman - man, girl - boy, wife - husband, etc. Sentences containing these words were sampled from diachronic corpora of English, Chinese and Russian, and sentiment scores for every word were obtained using two systems for Aspect-Based Sentiment Analysis: PyABSA, and OpenAI’s large language model GPT-3.5. The Generalized Linear Mixed Models of our data provide no indications of significantly more negative sentiment toward female referents in comparison with their male counterparts. However, some of the models suggest that female referents are more infrequently associated with neutral sentiment than male ones. Neither do our data support the hypothesis of the diachronic convergence between the genders. In sum, results suggest that pejoration is unlikely to be explained simply by negative attitudes to female referents in general.}
}

@book{Tyrefors2022sfi,
  title = {En modell för att mäta och belöna progression inom sfi (SOU 2022:17)},
  author = {Tyrefors, Björn and Ahlström, Lisa and Enbågen, Isabella and
        Rydell, Maria and Östling, Robert},
  year = {2022},
  month = march,
  isbn = {978-91-525-0358-4},
  url = {https://www.regeringen.se/rattsliga-dokument/statens-offentliga-utredningar/2022/03/sou-202217/}
}

@inproceedings{kurfali-ostling-2023-distantly,
  title = {A distantly supervised Grammatical Error Detection/Correction system for {S}wedish},
  author = {Kurfal{\i}, Murathan  and
      {\"O}stling, Robert},
  booktitle = {Proceedings of the 12th Workshop on NLP for Computer Assisted Language Learning},
  month = may,
  year = {2023},
  address = {T{\'o}rshavn, Faroe Islands},
  publisher = {LiU Electronic Press},
  url = {https://aclanthology.org/2023.nlp4call-1.4},
  pages = {35--39}
}

@inproceedings{Ostling2024evaluation,
  title = {Evaluation of Really Good Grammatical Error Correction},
  author = {{\"O}stling, Robert  and
      Gillholm, Katarina  and
      Kurfal{\i}, Murathan  and
      Mattson, Marie  and
      Wir{\'e}n, Mats},
  editor = {Calzolari, Nicoletta  and
      Kan, Min-Yen  and
      Hoste, Veronique  and
      Lenci, Alessandro  and
      Sakti, Sakriani  and
      Xue, Nianwen},
  booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
  month = may,
  year = {2024},
  address = {Torino, Italia},
  publisher = {ELRA and ICCL},
  url = {https://aclanthology.org/2024.lrec-main.584},
  pages = {6582--6593},
  abstract = {Traditional evaluation methods for Grammatical Error Correction (GEC) fail to fully capture the full range of system capabilities and objectives. The emergence of large language models (LLMs) has further highlighted the shortcomings of these evaluation strategies, emphasizing the need for a paradigm shift in evaluation methodology. In the current study, we perform a comprehensive evaluation of various GEC systems using a recently published dataset of Swedish learner texts. The evaluation is performed using established evaluation metrics as well as human judges. We find that GPT-3 in a few-shot setting by far outperforms previous grammatical error correction systems for Swedish, a language comprising only about 0.1{\%} of its training data. We also found that current evaluation methods contain undesirable biases that a human evaluation is able to reveal. We suggest using human post-editing of GEC system outputs to analyze the amount of change required to reach native-level human performance on the task, and provide a dataset annotated with human post-edits and assessments of grammaticality, fluency and meaning preservation of GEC system outputs.}
}

@article{Ostling2023languageembeddings,
  author = {Östling, Robert and Kurfalı, Murathan},
  title = {{Language Embeddings Sometimes Contain Typological
                    Generalizations}},
  journal = {Computational Linguistics},
  volume = {49},
  number = {4},
  pages = {1003-1051},
  year = {2023},
  month = {12},
  abstract = {{To what extent can neural network models learn generalizations about language
                    structure, and how do we find out what they have learned? We explore these
                    questions by training neural models for a range of natural language processing
                    tasks on a massively multilingual dataset of Bible translations in 1,295
                    languages. The learned language representations are then compared to existing
                    typological databases as well as to a novel set of quantitative syntactic and
                    morphological features obtained through annotation projection. We conclude that
                    some generalizations are surprisingly close to traditional features from
                    linguistic typology, but that most of our models, as well as those of previous
                    work, do not appear to have made linguistically meaningful generalizations.
                    Careful attention to details in the evaluation turns out to be essential to
                    avoid false positives. Furthermore, to encourage continued work in this field,
                    we release several resources covering most or all of the languages in our data:
                    (1) multiple sets of language representations, (2) multilingual word embeddings,
                    (3) projected and predicted syntactic and morphological features, (4) software
                    to provide linguistically sound evaluations of language representations.}},
  issn = {0891-2017},
  doi = {10.1162/coli_a_00491},
  url = {https://doi.org/10.1162/coli\_a\_00491},
  eprint = {https://direct.mit.edu/coli/article-pdf/49/4/1003/2269496/coli\_a\_00491.pdf}
}

@inbook{Ostling2016colexification,
  title = {The Lexical Typology of Semantic Shifts},
  author = {Robert {\"O}stling},
  chapter = {Studying colexification through massively parallell corpora},
  editor = {Juvonen, Päivi and Koptjevskaja-Tamm, Maria},
  pages = {157--176},
  publisher = {De Gruyter},
  year = {2016},
  doi = {10.1515/9783110377675-006},
  owner = {robert},
  timestamp = {2016.08.19},
  url = {http://dx.doi.org/10.1515/9783110377675-006}
}

@inproceedings{Ostling2016abayesian,
  title = {A {B}ayesian model for joint word alignment and part-of-speech transfer},
  author = {Robert {\"O}stling},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  year = {2016},
  address = {Osaka, Japan},
  month = {December},
  pages = {620--629},
  owner = {robert},
  timestamp = {2016.12.07},
  url = {http://www.robos.org/sections/research/coling2016.pdf}
}

@inproceedings{Ostling2016sigmorphon,
  author = {{\"O}stling, Robert},
  title = {Morphological reinflection with convolutional neural networks},
  booktitle = {Proceedings of the 14th {SIGMORPHON} Workshop on Computational Research in Phonetics, Phonology, and Morphology},
  year = {2016},
  publisher = {Association for Computational Linguistics},
  month = aug,
  pages = {23--26},
  doi = {10.18653/v1/W16-2003},
  url = {https://www.aclweb.org/anthology/W16-2003},
  address = {Berlin, Germany}
}

@article{Ostling2015svenskadialektkartor,
  title = {Svenska dialektkartor p{\aa} sekunden},
  author = {Robert {\"O}stling},
  journal = {Spr{\aa}kbruk},
  year = {2015},
  pages = {10--13},
  volume = {3},
  owner = {robert},
  timestamp = {2015.08.27}
}

@phdthesis{Ostling2015thesis,
  author = {Robert {\"O}stling},
  title = {Bayesian Models for Multilingual Word Alignment},
  year = {2015},
  note = {ISBN 978-91-7649-151-5},
  pages = {136},
  url = {http://urn.kb.se/resolve?urn=urn:nbn:se:su:diva-115541},
  school = {Stockholm University}
}

@inproceedings{Ostling2015wordorder,
  title = {Word Order Typology through Multilingual Word Alignment},
  author = {{\"O}stling, Robert},
  booktitle = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  year = {2015},
  address = {Beijing, China},
  month = {July},
  pages = {205--211},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/P15-2034}
}

@inproceedings{Ostling2014bayesianword,
  title = {Bayesian Word Alignment for Massively Parallel Texts},
  author = {{\"O}stling, Robert},
  booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers},
  year = {2014},
  address = {Gothenburg, Sweden},
  month = {April},
  pages = {123--127},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/E14-4024}
}

@article{Ostling2013stagger,
  title = {Stagger: {A}n Open-Source Part of Speech Tagger for {Swedish}},
  author = {Robert {\"O}stling},
  journal = {North European Journal of Language Technology},
  year = {2013},
  pages = {1--18},
  volume = {3},
  url = {http://dx.doi.org/10.3384/nejlt.2000-1533.1331}
}

@inproceedings{Ostling2012staggera,
  title = {{S}tagger: A modern {POS} tagger for {S}wedish},
  author = {Robert {\"O}stling},
  booktitle = {Proceedings of The Fourth Swedish Language Technology Conference},
  year = {2012},
  address = {Lund, Sweden},
  month = {October},
  pages = {83--84},
  owner = {robert},
  timestamp = {2015.04.09},
  url = {http://urn.kb.se/resolve?urn=urn:nbn:se:su:diva-81093}
}

@inproceedings{Ostling2010aconstruction,
  title = {A Construction Grammar Method for Disambiguating {Swedish} Compounds},
  author = {Robert {\"O}stling},
  booktitle = {{SLTC} 2010 Workshop on Compounds and Multiword Expressions},
  year = {2010},
  url = {http://urn.kb.se/resolve?urn=urn:nbn:se:su:diva-52761}
}

@mastersthesis{Ostling2009thesis,
  title = {A Corpus-based Collocation Assistant for {S}wedish Text},
  author = {Robert {\"O}stling},
  school = {Royal Institute of Technology (KTH)},
  year = {2009},
  owner = {robert},
  timestamp = {2015.04.10},
  url = {http://www.robos.org/sections/research/ostling_robert_09124.pdf}
}

@inproceedings{Ostling2015enrichingthe,
  title = {Enriching the {S}wedish {S}ign {L}anguage {C}orpus with Part of Speech Tags Using Joint {B}ayesian Word Alignment and Annotation Transfer},
  author = {Robert {\"O}stling and Carl B{\"o}rstell and Lars Wallin},
  booktitle = {Proceedings of the 20th Nordic Conference on Computational Linguistics (NODALIDA 2015)},
  year = {2015},
  address = {Vilnius, Lithuania},
  month = {May},
  pages = {263--268},
  series = {NEALT Proceedings Series},
  volume = {23},
  owner = {robert},
  timestamp = {2015.03.18},
  url = {http://www.ep.liu.se/ecp/109/ecp15109.pdf}
}

@inproceedings{Ostling2009acorpus,
  title = {A corpus-based tool for helping writers with {S}wedish collocations},
  author = {Robert {\"O}stling and Ola Knutsson},
  booktitle = {Proceedings of the Workshop on Extracting and Using Constructions in NLP},
  year = {2009},
  address = {Odense, Denmark},
  month = {May},
  note = {ISSN 1100-3154},
  number = {10},
  pages = {28--33},
  series = {SICS Technical Report},
  volume = {T2009},
  owner = {robert},
  timestamp = {2015.04.09},
  url = {http://www.robos.org/sections/research/constructions_workshop.pdf}
}

@inproceedings{Ostling2013aes,
  title = {Automated Essay Scoring for {Swedish}},
  author = {{\"O}stling, Robert and Smolentzov, Andr\'{e} and Tyrefors Hinnerich, Bj\"{o}rn and H\"{o}glin, Erik},
  booktitle = {Proceedings of the Eighth Workshop on Innovative Use of {NLP} for Building Educational Applications},
  year = {2013},
  address = {Atlanta, Georgia},
  month = {June},
  pages = {42--47},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W13-1705}
}

@article{Ostling2016efmaral,
  title = {Efficient word alignment with {M}arkov {C}hain {M}onte {C}arlo},
  author = {Robert {\"O}stling and J{\"o}rg Tiedemann},
  journal = {Prague Bulletin of Mathematical Linguistics},
  year = {2016},
  month = {October},
  pages = {125--146},
  volume = {106},
  owner = {robert},
  timestamp = {2016.08.26},
  url = {http://ufal.mff.cuni.cz/pbml/106/art-ostling-tiedemann.pdf}
}

@incollection{Ostling2013compounding,
  title = {Compounding in a {Swedish} Blog Corpus},
  author = {Robert {\"O}stling and Mats Wir\'{e}n},
  booktitle = {Computer mediated discourse across languages},
  publisher = {Stockholm University},
  year = {2013},
  editor = {Laura \'Alvarez L\'opez and Charlotta Seiler Brylla and Philip Shaw},
  pages = {45--63},
  series = {Stockholm Studies in Modern Philology},
  volume = {New Series 12},
  isbn = {978-91-87235-40-5},
  issn = {0585-3583},
  url = {http://urn.kb.se/resolve?urn=urn:nbn:se:su:diva-97988}
}

@inproceedings{Borstell2016visualizing,
  title = {Visualizing Lects in a Sign Language Corpus: Mining Lexical Variation Data in Lects of {S}wedish Sign Language},
  author = {Carl B{\"o}rstell and Robert {\"O}stling},
  booktitle = {Proceedings of the 7th Workshop on the Representation and Processing of Sign Languages: Corpus Mining},
  year = {2016},
  pages = {13--18},
  owner = {robert},
  timestamp = {2016.08.19},
  url = {http://www.lrec-conf.org/proceedings/lrec2016/workshops/LREC2016Workshop-SignLanguage_Proceedings.pdf}
}

@article{Borstell2016distribution,
  author = {B{\"o}rstell, Carl and Thomas H{\"o}rberg and Robert {\"O}stling},
  title = {Distribution and duration of signs and parts of speech in {Swedish Sign Language}},
  journal = {Sign Language \& Linguistics},
  year = {2016},
  volume = {19},
  number = {2},
  pages = {143--196},
  owner = {robert},
  timestamp = {2016.12.07}
}

@inproceedings{Berggren2015inferringthe,
  title = {Inferring the location of authors from words in their texts},
  author = {Max Berggren and Jussi Karlgren and Robert {\"O}stling and Mikael Parkvall},
  booktitle = {Proceedings of the 20th Nordic Conference on Computational Linguistics (NODALIDA 2015)},
  year = {2015},
  address = {Vilnius, Lithuania},
  month = {May},
  pages = {211--218},
  series = {NEALT Proceedings Series},
  volume = {23},
  owner = {robert},
  timestamp = {2015.03.18},
  url = {http://www.ep.liu.se/ecp/109/ecp15109.pdf}
}

@inproceedings{NilssonBjorkenstam2016informativeness,
  title = {Modelling the informativeness and timing of non-verbal cues in parent-child interaction},
  author = {Kristina Nilsson Bj{\"o}rkenstam and Mats Wir{\'e}n and Robert {\"O}stling},
  booktitle = {Proceedings of the 7th Workshop on Cognitive Aspects of Computational Language Learning},
  year = {2016},
  address = {Berlin},
  month = {August},
  pages = {82--90},
  publisher = {Association for Computational Linguistics},
  owner = {robert},
  timestamp = {2016.08.19},
  url = {http://aclweb.org/anthology/W/W16/W16-1907.pdf}
}

@incollection{Ostling2022borin,
  title = {Mot en mänskligare maskinöversättning},
  author = {Robert {\"O}stling},
  editor = {Volodina, Elena and Dannélls, Dana and Berdicevskis, Aleksandrs
        and Forsberg, Markus and Virk, Shafqat},
  booktitle = {LIVE and LEARN -- Festschrift in honor of Lars Borin},
  pages = {171--173},
  isbn = {978-91-87850-83-7},
  issn = {1401-5919},
  year = {2022},
  url = {https://hdl.handle.net/2077/74254}
}

@inproceedings{Loftsson2013tagginga,
  title = {Tagging a Morphologically Complex Language Using an Averaged Perceptron Tagger: The Case of {I}celandic},
  author = {Hrafn Loftsson and Robert {\"O}stling},
  booktitle = {Proceedings of the 19th Nordic Conference on Computational Linguistics (NODALIDA 2013)},
  year = {2013},
  address = {Oslo, Norway},
  pages = {105--119},
  series = {NEALT Proceedings Series},
  url = {http://www.robos.org/sections/research/icestagger.pdf}
}

@inproceedings{Tiedemann2016wmtsharedtask,
  title = {Phrase-Based {SMT} for {F}innish with More Data, Better Models and Alternative Alignment and Translation Tools},
  author = {Tiedemann, J\"{o}rg and Cap, Fabienne and Kanerva, Jenna and Ginter, Filip and Stymne, Sara and \"{O}stling, Robert and Weller-Di Marco, Marion},
  booktitle = {Proceedings of the First Conference on Machine Translation},
  year = {2016},
  address = {Berlin, Germany},
  month = {August},
  pages = {391--398},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W16-2326}
}

@inproceedings{Bjerva2017Multilingual,
  author = {Bjerva, Johannes and \"{O}stling, Robert},
  title = {Cross-lingual Learning of Semantic Textual Similarity with Multilingual Word Representations},
  booktitle = {Proceedings of the 21st Nordic Conference on Computational Linguistics},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {May},
  pages = {211--215},
  url = {http://www.aclweb.org/anthology/W17-0224},
  address = {Gothenburg, Sweden}
}

@inproceedings{Borstell2017iconiclocations,
  author = {B\"{o}rstell, Carl and \"{O}stling, Robert},
  title = {Iconic Locations in Swedish Sign Language: Mapping Form to Meaning with Lexical Databases},
  booktitle = {Proceedings of the 21st Nordic Conference on Computational Linguistics},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {May},
  pages = {221--225},
  url = {http://www.aclweb.org/anthology/W17-0226},
  address = {Gothenburg, Sweden}
}

@inproceedings{Ostling2017ud,
  author = {\"{O}stling, Robert and B\"{o}rstell, Carl and G\"{a}rdenfors, Moa and Wir\'{e}n, Mats},
  title = {Universal Dependencies for Swedish Sign Language},
  booktitle = {Proceedings of the 21st Nordic Conference on Computational Linguistics},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {May},
  pages = {303--308},
  url = {http://www.aclweb.org/anthology/W17-0243},
  address = {Gothenburg, Sweden}
}

@inproceedings{Ostling2017multilm,
  author = {\"{O}stling, Robert and Tiedemann, J\"{o}rg},
  title = {Continuous multilinguality with language vectors},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {April},
  pages = {644--649},
  url = {http://www.aclweb.org/anthology/E17-2102},
  abstract = {Most existing models for multilingual natural language processing (NLP)
	treat language as a discrete category, and make predictions for either
	one language or the other. In contrast, we propose using continuous vector
	representations of language. We show that these can be learned efficiently
	with a character-based neural language model, and used to improve
	inference about language varieties not seen during training.
	In experiments with 1303 Bible translations into 990 different languages,
	we empirically explore the capacity of multilingual language models,
	and also show that the language vectors capture genetic
	relationships between languages.},
  address = {Valencia, Spain}
}

@inproceedings{Ostling2017hnmt,
  author = {\"{O}stling, Robert and Scherrer, Yves and Tiedemann, J\"{o}rg and Tang, Gongbo and Nieminen, Tommi},
  title = {The {H}elsinki Neural Machine Translation System},
  booktitle = {Proceedings of the Second Conference on Machine Translation},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {September},
  pages = {338--347},
  url = {http://www.aclweb.org/anthology/W17-4733},
  address = {Copenhagen, Denmark}
}

@inproceedings{Bjerva2017nli,
  author = {Bjerva, Johannes and Grigonyte, Gintare and \"{O}stling, Robert and Plank, Barbara},
  title = {Neural Networks and Spelling Features for Native Language Identification},
  booktitle = {Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {September},
  pages = {235--239},
  url = {http://www.aclweb.org/anthology/W17-5025},
  abstract = {We present the RUG-SU team's submission at the Native Language Identification
	Shared Task 2017.
	We combine several approaches into an ensemble, based on spelling error
	features, a simple neural network using word representations, a deep residual
	network using word and character features, and a system based on a recurrent
	neural network. 
	Our best system is an ensemble of neural networks, reaching an F1 score of
	0.8323.
	Although our system is not the highest ranking one, we do outperform the
	baseline by far.},
  address = {Copenhagen, Denmark}
}

@inproceedings{Ostling2017textquality,
  author = {\"{O}stling, Robert and Grigonyte, Gintare},
  title = {Transparent text quality assessment with convolutional neural networks},
  booktitle = {Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  month = {September},
  pages = {282--286},
  url = {http://www.aclweb.org/anthology/W17-5031},
  abstract = {We present a very simple model for text quality assessment based on a deep
	convolutional neural network, where the only supervision required is one corpus
	of user-generated text of varying quality, and one contrasting text corpus of
	consistently high quality. Our model is able to provide local quality
	assessments in different parts of a text, which allows visual feedback about
	where potentially problematic parts of the text are located, as well as a way
	to evaluate which textual features are captured by our model. We evaluate our
	method on two corpora: a large corpus of manually graded student essays and a
	longitudinal corpus of language learner written production, and find that the
	text quality metric learned by our model is a fairly strong predictor of both
	essay grade and learner proficiency level.},
  address = {Copenhagen, Denmark}
}

@article{TKS2017clin,
  author = {Tjong Kim Sang, Erik and Bollmann, Marcel and Boschker, Remko and Casacuberta, Francisco and Dietz, Feike and Dipper, Stefanie and Domingo, Miguel and van der Goot, Rob and van Koppen, Marjo and Ljube{\v s}i{\'c}, Nikola and {\"O}stling, Robert and Petran, Florian and Pettersson, Eva and Scherrer, Yves and Schraagen, Marijn and Sevens, Leen and Tiedemann, J{\"o}rg and Vanallemeersch, Tom and Zervanou, Kalliopi},
  title = {The CLIN27 Shared Task: Translating Historical Text to Contemporary Language for Improving Automatic Linguistic Annotation},
  journal = {Computational Linguistics in the Netherlands Journal},
  year = {2017},
  volume = {7},
  month = {12/2017},
  pages = {53-64},
  issn = {2211-4009},
  attachments = {http://www.clinjournal.org/sites/clinjournal.org/files/04.clin27-shared-task.pdf}
}

@inproceedings{Ostling2017sigmorphon,
  author = {{\"O}stling, Robert and Bjerva, Johannes},
  title = {SU-RUG at the CoNLL-SIGMORPHON 2017 shared task: Morphological Inflection with Attentional Sequence-to-Sequence Models},
  booktitle = {Proceedings of the CoNLL SIGMORPHON 2017 Shared Task: Universal Morphological Reinflection},
  year = {2017},
  publisher = {Association for Computational Linguistics},
  location = {Vancouver},
  pages = {110--113},
  doi = {10.18653/v1/K17-2012},
  url = {http://www.aclweb.org/anthology/K17-2012}
}

@article{Ostling2018efselab,
  author = {Robert {\"O}stling},
  title = {Part of Speech Tagging: Shallow or Deep Learning?},
  journal = {North European Journal of Language Technology},
  year = {2018},
  number = {5},
  pages = {1--15},
  doi = {10.3384/nejlt.2000-1533.1851},
  url = {http://dx.doi.org/10.3384/nejlt.2000-1533.1851}
}

@inproceedings{Ek2018lrec,
  author = {Ek, Adam and Wir{\'e}n, Mats and {\"O}stling, Robert and N. Bj{\"o}rkenstam, Kristina and Grigonyt{\.e}, Gintar{\.e} and Gustafson Capkov{\'a}, Sofia},
  title = {Identifying Speakers and Addressees in Dialogues Extracted from Literary Fiction},
  booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)},
  year = {2018},
  publisher = {European Languages Resources Association (ELRA)},
  month = may,
  url = {https://www.aclweb.org/anthology/L18-1131},
  address = {Miyazaki, Japan}
}

@article{Ostling2018visualiconicity,
  author = {{\"O}stling, Robert and B{\"o}rstell, Carl and Courtaux, Servane},
  title = {Visual Iconicity Across Sign Languages: Large-Scale Automated Video Analysis of Iconic Articulators and Locations},
  journal = {Frontiers in Psychology},
  year = {2018},
  volume = {9},
  pages = {1--17},
  note = {Article 725},
  issn = {1664-1078},
  doi = {10.3389/fpsyg.2018.00725},
  url = {https://www.frontiersin.org/article/10.3389/fpsyg.2018.00725}
}

@article{Bjerva2019cl,
  author = {Bjerva, Johannes and {\"O}stling, Robert and Han Veiga, Maria and Tiedemann, Jörg and Augenstein, Isabelle},
  title = {What Do Language Representations Really Represent?},
  journal = {Computational Linguistics},
  year = {2019},
  volume = {45},
  number = {2},
  pages = {381-389},
  doi = {10.1162/COLI\_a\_00351},
  eprint = {https://doi.org/10.1162/COLI_a_00351},
  url = { 
        https://doi.org/10.1162/COLI_a_00351
    
},
  abstract = { A neural language model trained on a text corpus can be used to induce distributed representation of words, such that similar words end up with similar representations. If the corpus is multilingual, the same model can be used to learn distributed representations of languages, such that similar languages end up with similar representations. We show that this holds even when the multilingual corpus has been translated into English, by picking up the faint signal left by the source languages. However, just as it is a thorny problem to separate semantic from syntactic similarity in word representations, it is not obvious what type of similarity is captured by language representations.We investigate correlations and causal relationships between language representations learned from translations on one hand, and genetic, geographical, and several levels of structural similarity between languages on the other. Of these, structural similarity is found to correlate most strongly with language representation similarity, whereas genetic relationships—a convenient benchmark used for evaluation in previous work—appears to be a confounding factor. Apart from implications about translation effects, we see this more generally as a case where NLP and linguistic typology can interact and benefit one another. }
}

@inproceedings{Kurfali2019noisyparallel,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {Noisy Parallel Corpus Filtering through Projected Word Embeddings},
  booktitle = {Proceedings of the Fourth Conference on Machine Translation (Volume 3: Shared Task Papers, Day 2)},
  year = {2019},
  publisher = {Association for Computational Linguistics},
  month = aug,
  pages = {277--281},
  url = {https://www.aclweb.org/anthology/W19-5438},
  abstract = {We present a very simple method for parallel text cleaning of low-resource languages, based on projection of word embeddings trained on large monolingual corpora in high-resource languages. In spite of its simplicity, we approach the strong baseline system in the downstream machine translation evaluation.},
  address = {Florence, Italy}
}

@inproceedings{Kurfali2019zeroshot,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {Zero-shot transfer for implicit discourse relation classification},
  booktitle = {Proceedings of the 20th Annual SIGdial Meeting on Discourse and Dialogue},
  year = {2019},
  publisher = {Association for Computational Linguistics},
  month = sep,
  pages = {226--231},
  doi = {10.18653/v1/W19-5927},
  url = {https://www.aclweb.org/anthology/W19-5927},
  abstract = {Automatically classifying the relation between sentences in a discourse is a challenging task, in particular when there is no overt expression of the relation. It becomes even more challenging by the fact that annotated training data exists only for a small number of languages, such as English and Chinese. We present a new system using zero-shot transfer learning for implicit discourse relation classification, where the only resource used for the target language is unannotated parallel text. This system is evaluated on the discourse-annotated TED-MDB parallel corpus, where it obtains good results for all seven languages using only English training data.},
  address = {Stockholm, Sweden}
}

@inproceedings{Wiren2017modelling,
  author = {Wir{\'e}n, Mats and N. Bj{\"o}rkenstam, Kristina and {\"O}stling, Robert},
  title = {Modelling the Informativeness of Non-Verbal Cues in Parent–Child Interaction},
  booktitle = {Proceedings of Interspeech 2017},
  year = {2017},
  series = {Interspeech},
  isbn = {9781510848764},
  pages = {2203--2207},
  doi = {10.21437/Interspeech.2017-1143},
  abstract = {Non-verbal cues from speakers, such as eye gaze and hand positions, play an important role in word learning. This is consistent with the notion that for meaning to be reconstructed, acoustic patterns need to be linked to time-synchronous patterns from at least one other modality. In previous studies of a multimodally annotated corpus of parent–child interaction, we have shown that parents interacting with infants at the early word-learning stage (7–9 months) display a large amount of time-synchronous patterns, but that this behaviour tails off with increasing age of the children. Furthermore, we have attempted to quantify the informativeness of the different nonverbal cues, that is, to what extent they actually help to discriminate between different possible referents, and how critical the timing of the cues is. The purpose of this paper is to generalise our earlier model by quantifying informativeness resulting from non-verbal cues occurring both before and after their associated verbal references. },
  institution = {Stockholm University, Computational Linguistics},
  keywords = {language acquisition, child-directed speech, word learning, cross-situational learning, social cues, nonverbal cues, synchrony}
}

@inproceedings{Kurfali2020mwe,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert and Sjons, Johan and Wir{\'e}n, Mats},
  title = {A Multi-word Expression Dataset for Swedish},
  booktitle = {Proceedings of the Twelfth International Conference on Language Resources and Evaluation ({LREC} 2020)},
  year = {2020},
  publisher = {European Languages Resources Association (ELRA)},
  month = may,
  address = {Marseille, France}
}

@inproceedings{Andersson2020sentiment,
  author = {Andersson, Marta and Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {A sentiment-annotated dataset of English causal connectives},
  booktitle = {Proceedings of the 14th Linguistic Annotation Workshop (LAW), COLING 2020},
  year = {2020},
  date = {December 2020}
}

@incollection{Kurfali2020pie,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {Disambiguation of Potentially Idiomatic Expressions with Contextual Embeddings},
  booktitle = {Proceedings of the Joint Workshop on Multiword Expressions and Electronic Lexicons (MWE-LEX 2020), COLING 2020},
  year = {2020},
  date = {December 2020}
}

@inproceedings{Kurfali2021connectiveprediction,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {Let{'}s be explicit about that: Distant supervision for implicit discourse relation classification via connective prediction},
  booktitle = {Proceedings of the 1st Workshop on Understanding Implicit and Underspecified Language},
  year = {2021},
  publisher = {Association for Computational Linguistics},
  month = aug,
  pages = {1--10},
  doi = {10.18653/v1/2021.unimplicit-1.1},
  url = {https://aclanthology.org/2021.unimplicit-1.1},
  abstract = {In implicit discourse relation classification, we want to predict the relation between adjacent sentences in the absence of any overt discourse connectives. This is challenging even for humans, leading to shortage of annotated data, a fact that makes the task even more difficult for supervised machine learning approaches. In the current study, we perform implicit discourse relation classification without relying on any labeled implicit relation. We sidestep the lack of data through explicitation of implicit relations to reduce the task to two sub-problems: language modeling and explicit discourse relation classification, a much easier problem. Our experimental results show that this method can even marginally outperform the state-of-the-art, in spite of being much simpler than alternative models of comparable performance. Moreover, we show that the achieved performance is robust across domains as suggested by the zero-shot experiments on a completely different domain. This indicates that recent advances in language modeling have made language models sufficiently good at capturing inter-sentence relations without the help of explicit discourse markers.},
  address = {Online}
}

@inproceedings{Kurfali2021probing,
  author = {Kurfal{\i}, Murathan and {\"O}stling, Robert},
  title = {Probing Multilingual Language Models for Discourse},
  booktitle = {Proceedings of the 6th Workshop on Representation Learning for NLP (RepL4NLP-2021)},
  year = {2021},
  publisher = {Association for Computational Linguistics},
  month = aug,
  pages = {8--19},
  doi = {10.18653/v1/2021.repl4nlp-1.2},
  url = {https://aclanthology.org/2021.repl4nlp-1.2},
  abstract = {Pre-trained multilingual language models have become an important building block in multilingual Natural Language Processing. In the present paper, we investigate a range of such models to find out how well they transfer discourse-level knowledge across languages. This is done with a systematic evaluation on a broader set of discourse-level tasks than has been previously been assembled. We find that the XLM-RoBERTa family of models consistently show the best performance, by simultaneously being good monolingual models and degrading relatively little in a zero-shot setting. Our results also indicate that model distillation may hurt the ability of cross-lingual transfer of sentence representations, while language dissimilarity at most has a modest effect. We hope that our test suite, covering 5 tasks with a total of 22 languages in 10 distinct families, will serve as a useful evaluation platform for multilingual performance at and beyond the sentence level.},
  address = {Online}
}

@article{Masciolini2025multigecdata,
  author = {Masciolini, Arianna and Caines, Andrew and De Clercq, Orphée and Kruijsbergen, Joni and Kurfalı, Murathan and Muñoz Sánchez, Ricardo and Volodina, Elena and Östling, Robert and Allkivi, Kais and Arhar Holdt, Špela and Auzina, Ilze and Darģis, Roberts and Drakonaki, Elena and Frey, Jennifer-Carmen and Glišić, Isidora and Kikilintza, Pinelopi and Nicolas, Lionel and Romanyshyn, Mariana and Rosen, Alexandr and Rozovskaya, Alla and Suluste, Kristjan and Syvokon, Oleksiy and Tantos, Alexandros and Touriki, Despoina-Ourania and Tsiotskas, Konstantinos and Tsourilla, Eleni and Varsamopoulos, Vassilis and Wisniewski, Katrin and Žagar, Aleš and Zesch, Torsten},
  title = {Towards better language representation in Natural Language Processing},
  journal = {International Journal of Learner Corpus Research},
  issn = {2215-1478},
  year = {2025},
  publisher = {John Benjamins},
  url = {https://www.jbe-platform.com/content/journals/10.1075/ijlcr.24033.mas},
  doi = {https://doi.org/10.1075/ijlcr.24033.mas},
  keywords = {multilingual corpora},
  keywords = {grammatical error correction},
  keywords = {Matthew effect},
  keywords = {MultiGEC shared task},
  keywords = {learner corpora}
}

This file was generated by bibtex2html 1.99.