biblio.bib 71 KB

  1. % This file was created with JabRef 2.10.
  2. % Encoding: UTF-8
  3. @String { ACTESDE = {Actes de } }
  4. @String { apr = {\ifFABR avril\else April\fi} }
  5. @String { aug = {\ifFABR ao{\^u}t\else August\fi} }
  6. @String { barc = {\ifFABR Barcelone\else Barcelona\fi} }
  7. @String { bulg = {\ifFABR Bulgarie\else Bulgaria\fi} }
  8. @String { caribbean = {\ifFABR Caraïbes\else Caribbean\fi} }
  9. @String { dec = {\ifFABR décembre\else December\fi} }
  10. @String { dresden = {\ifFABR Dresde\else Dresden\fi} }
  11. @String { feb = {\ifFABR février\else February\fi} }
  12. @String { ger = {\ifFABR Allemagne\else Germany\fi} }
  13. @String { hawaii = {\ifFABR Hawa\"{i}\else Hawaii\fi} }
  14. @String { iceland = {\ifFABR Islande\else Iceland\fi} }
  15. @String { india = {\ifFABR Inde\else India\fi} }
  16. @String { ireland = {\ifFABR Irlande\else Ireland\fi} }
  17. @String { jan = {\ifFABR janvier\else January\fi} }
  18. @String { jap = {\ifFABR Japon\else Japan\fi} }
  19. @String { jul = {\ifFABR juillet\else July\fi} }
  20. @String { jun = {\ifFABR juin\else June\fi} }
  21. @String { kor = {\ifFABR Cor\'ee du Sud\else South Korea\fi} }
  22. @String { lond = {\ifFABR Londres\else London\fi} }
  23. @String { mar = {\ifFABR mars\else March\fi} }
  24. @String { may = {\ifFABR mai\else May\fi} }
  25. @String { montreal = {\ifFABR Montr{\'e}al\else Montreal\fi} }
  26. @String { nov = {\ifFABR novembre\else November\fi} }
  27. @String { nyc = {New-York, NY} }
  28. @String { oct = {\ifFABR octobre\else October\fi} }
  29. @String { philadelphia = {\ifFABR Philadeplhie\else Philadelphia\fi} }
  30. @String { pol = {\ifFABR Pologne\else Poland\fi} }
  31. @String { sep = {\ifFABR septembre\else September\fi} }
  32. @String { slo = {\ifFABR Slovénie\else Slovenia\fi} }
  33. @String { spa = {\ifFABR Espagne\else Spain\fi} }
  34. @String { swed = {\ifFABR Su{\`e}de\else Sweden\fi} }
  35. @String { TALN = {Traitement automatique des langues naturelles} }
  36. @String { TALNPAREN = { (} # TALN # {)} }
  37. @String { turk = {\ifFABR Turquie\else Turkey\fi} }
  38. @String { uk = {\ifFABR Royaume-Uni\else UK\fi} }
  39. @String { usa = {\ifFABR États-Unis\else USA\fi} }
  40. @String { val = {\ifFABR Valence\else Valencia\fi} }
  41. @Book{adolf_dictionnaire_2006,
  42. Title = {Dictionnaire comparatif multilingue: fran\c{c}ais-allemand-alsacien-anglais},
  43. Author = {Adolf, Paul},
  44. Publisher = {Midgard},
  45. Year = {2006},
  46. Address = {Strasbourg, France},
  47. ISBN = {2-84512-038-9},
  48. Keywords = {Alsacien (dialecte) -- Dictionnaires multilingues, Français (langue) -- Dictionnaires multilingues},
  49. Shorttitle = {Dictionnaire comparatif multilingue}
  50. }
  51. @Article{agic2016multilingual,
  52. Title = {Multilingual projection for parsing truly low-resource languages},
  53. Author = {Agi{\'c}, {\v{Z}}eljko and Johannsen, Anders and Plank, Barbara and Mart{\'\i}nez, H{\'e}ctor Alonso and Schluter, Natalie and S{\o}gaard, Anders},
  54. Journal = {Transactions of the Association for Computational Linguistics},
  55. Year = {2016},
  56. Pages = {301--312},
  57. Volume = {4}
  58. }
  59. @InProceedings{Ahn2013,
  60. Title = {Duolingo: learn a language for free while helping to translate the web.},
  61. Author = {Luis von Ahn},
  62. Booktitle = ACTESDE # { 2013 international conference on Intelligent user interfaces (IUI '13)},
  63. Year = {2013},
  64. Address = {Santa Monica, CA, } # usa,
  65. Editor = {ACM},
  66. Month = mar,
  67. Pages = {1-2},
  68. Doi = {DOI:},
  69. Owner = {kfort},
  70. Timestamp = {2017.12.29}
  71. }
  72. @Article{Avanzi2017,
  73. Title = {A crowdsourcing approach to the description of regional variation in French object clitic clusters},
  74. Author = {Avanzi, Mathieu and Stark, Elisabeth},
  75. Journal = {Belgian Journal of Linguistics},
  76. Year = {2017},
  77. Number = {1},
  78. Pages = {76-103},
  79. Volume = {31},
  80. Owner = {alice},
  81. Timestamp = {2018.05.27}
  82. }
  83. @InCollection{Bohmova2001,
  84. Title = {The Prague Dependency Treebank: Three-Level Annotation Scenario},
  85. Author = {Alena B\"{o}hmov\'{a} and Jan Haji\v{c} and Eva Haji\v{c}ov\'{a} and Barbora Hladk\'{a}},
  86. Booktitle = {Treebanks: Building and Using Syntactically Annotated Corpora},
  87. Publisher = {Kluwer Academic Publishers},
  88. Year = {2001},
  89. Editor = {Anne Abeill\'{e}},
  90. File = {:/home/kfort/Dev/quaero/trunk/thesis/writing/Articles_biblio/Czech_PDT.pdf:PDF},
  91. Keywords = {corpus, annotation},
  92. Owner = {fort},
  93. Timestamp = {2010.11.08},
  94. Url = {}
  95. }
  96. @Book{institut_2004,
  97. Title = {L'enqu\^{e}te "\'{e}tude de l'histoire familiale" de 1999 - R\'{e}sultats d\'{e}taill\'{e}s},
  98. Author = {Barre, Corinne and Vanderschelden, M\'{e}lanie},
  99. Publisher = {INSEE},
  100. Year = {2004},
  101. Address = {Paris},
  102. ISBN = {978-2-11-068285-7},
  103. Language = {French}
  104. }
  105. @InProceedings{Barteld2017,
  106. Title = {Detecting spelling variants in non-standard texts},
  107. Author = {Barteld, Fabian},
  108. Booktitle = ACTESDE # { Student Research Workshop (EACL 2017)},
  109. Year = {2017},
  110. Address = val # {, } # spa,
  111. Month = may,
  112. Owner = {alice},
  113. Timestamp = {2017.05.03}
  114. }
  115. @InProceedings{Benjamin2018,
  116. Title = {{Hard Numbers: Language Exclusion in Computational Linguistics and Natural Language Processing}},
  117. Author = {Benjamin, Martin},
  118. Booktitle = ACTESDE # { 11th International Conference on Language Resources and Evaluation (LREC 2018)},
  119. Year = {2018},
  120. Address = {Miyazaki, } # jap,
  121. Month = may,
  122. Abstract = {The intersection between computer science and human language occurs largely for English and a few dozen other languages with strong economic or political support. The supermajority of the world's languages have extremely little digital presence, and little activity that can be forecast to change that status. However, such an assertion has remained impressionistic in the absence of data comparing the attention lavished on elite languages with that given to the rest of the world. This study seeks to give some numbers to the extent to which non-lucrative languages sit at the margins of language technology and computational research. Three datasets are explored that reveal current hiring and research activity at universities and corporations concerned with computational linguistics and natural language processing. The data supports the conclusion that most research activity and career opportunities focus on a few languages, while most languages have little or no current research and little possibility for the professional pursuit of their development.},
  123. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Benjamin - Unknown - Hard Numbers Language Exclusion in Computational Linguistics and Natural Language Processing.pdf:pdf},
  124. Keywords = {NLP,computational linguistics,language technology,under-resourced languages},
  125. Url = {{\_}W26.pdf}
  126. }
  127. @InProceedings{Benzitoun2012,
  128. Title = {{TCOF-POS} : un corpus libre de fran{\c c}ais parl{\'e} annot{\'e} en morphosyntaxe ({TCOF-POS} : A Freely Available POS-Tagged Corpus of Spoken French) [in French]},
  129. Author = {Benzitoun, Christophe and Fort, Kar{\"e}n and Sagot, Beno{\^i}t},
  130. Booktitle = ACTESDE # { JEP-TALN-RECITAL 2012, volume 2: TALN},
  131. Year = {2012},
  132. Address = {Grenoble, France},
  133. Month = jun,
  134. Pages = {99--112},
  135. Publisher = {ATALA/AFCP},
  136. Owner = {alice},
  137. Timestamp = {2018.06.12},
  138. Url = {}
  139. }
  140. @PhdThesis{Berment2004,
  141. Title = {M\'ethodes pour informatiser les langues et les groupes de langues "peu dot\'ees"},
  142. Author = {Berment, Vincent},
  143. School = {{Universit{\'e} Joseph-Fourier - Grenoble I}},
  144. Year = {2004},
  145. Month = may,
  146. Type = {Th\`ese},
  147. File = {tel-00006313.pdf:https\://},
  148. Hal_id = {tel-00006313},
  149. Hal_local_reference = {theses/2004/Berment.Vincent},
  150. Hal_version = {v1},
  151. Keywords = {computerization ; word processor ; virtual keyboard ; dictionary ; language ; writing system ; mutualization ; informatisation ; traitement de textes ; clavier virtuel ; dictionnaire ; langue ; syst{\`e}me d'{\'e}criture ; segmentation ; mutualisation},
  152. Owner = {kfort},
  153. Timestamp = {2017.02.01},
  154. Url = {}
  155. }
  156. @Book{Bernabe2001,
  157. Title = {{La graphie cr{\'{e}}ole}},
  158. Author = {Bernab{\'{e}}, Jean},
  159. Publisher = {{Guides du CAPES de Cr{\'{e}}ole}},
  160. Year = {2001},
  161. Edition = {{Ibis Rouge}},
  162. Mendeley-groups = {LRL/Creole}
  163. }
  164. @TechReport{Bernhard2016,
  165. Title = {Guide d'annotation morphosyntaxique pour les dialectes alsaciens},
  166. Author = {Delphine Bernhard and Pascale Erhart and Dominique Huck and Lucie Steibl\'e},
  167. Institution = {LiLPa, Universit\'e de Strasbourg},
  168. Year = {2016},
  169. Type = {Guide d'annotation},
  170. Owner = {kfort},
  171. Timestamp = {2017.02.02}
  172. }
  173. @InProceedings{bernhard_es_2013,
  174. Title = {Es esch f\`ascht wie {Ditsch}, oder net? \'Etiquetage morphosyntaxique de l'alsacien en passant par l'allemand},
  175. Author = {Bernhard, Delphine and Ligozat, Anne-Laure},
  176. Booktitle = ACTESDE # { {TALARE} 2013 : {Traitement} {Automatique} des {Langues} {R\'egionales} de {France} et d'{Europe} (TALN '2013)},
  177. Year = {2013},
  178. Address = {Les Sables d'Olonne, France},
  179. Month = jun,
  180. Pages = {209--220}
  181. }
  182. @InProceedings{Bettinson2017,
  183. Title = {Developing a suite of mobile applications for collaborative language documentation},
  184. Author = {Bettinson, Mat and Bird, Steven},
  185. Booktitle = ACTESDE # { 2nd Workshop on Computational Methods for Endangered Languages},
  186. Year = {2017},
  187. Address = {Honolulu, } # hawaii,
  188. Month = mar,
  189. Pages = {156-164},
  190. Owner = {alice},
  191. Timestamp = {2018.05.27}
  192. }
  193. @Article{Blachon2016,
  194. Title = {{Parallel Speech Collection for Under-resourced Language Studies Using the Lig-Aikuma Mobile Device App}},
  195. Author = {Blachon, David and Gauthier, Elodie and Besacier, Laurent and Kouarata, Guy-No{\"{e}}l and Adda-Decker, Martine and Rialland, Annie},
  196. Journal = {Procedia Computer Science},
  197. Year = {2016},
  198. Pages = {61--66},
  199. Volume = {81},
  200. Annote = {SLTU-2016 5th Workshop on Spoken Language Technologies for Under-resourced languages 09-12 May 2016 Yogyakarta, Indonesia},
  201. Doi = {},
  202. ISSN = {1877-0509},
  203. Keywords = { language documentation, under-resourced languages,Speech collection tool},
  204. Owner = {alice},
  205. Timestamp = {2018.05.27},
  206. Url = {}
  207. }
  208. @InProceedings{BouladeMareueil2018,
  209. Title = {A Speaking Atlas of the Regional Languages of France},
  210. Author = {Boula de Mare{\"u}il, Philippe and Rilliard, Albert and Vernier Fr{\'e}d{\'e}ric},
  211. Booktitle = ACTESDE # { 11th International Conference on Language Resources and Evaluation (LREC 2018)},
  212. Year = {2018},
  213. Address = {Miyazaki, } # jap,
  214. Month = MAY,
  215. Date = {7-12},
  216. ISBN = {979-10-95546-00-9},
  217. Language = {english}
  218. }
  219. @InProceedings{Branco2018,
  220. Title = {{We Are Depleting Our Research Subject as We Are Investigating It: In Language Technology, more Replication and Diversity Are Needed}},
  221. Author = {Branco, Ant{\'{o}}nio},
  222. Booktitle = ACTESDE # { International Conference on Language Resources and Evaluation (LREC 2018)},
  223. Year = {2018},
  224. Address = {Miyazaki, } # jap,
  225. Month = may,
  226. Abstract = {In this paper, we present an analysis indicating that, in language technology, as we are investigating natural language we are contributing to deplete it in the sense that we are contributing to reduce the diversity of languages. To address this circumstance, we propose that more replication and reproduction and more language diversity need to be taken into account in our research activities.},
  227. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Branco - Unknown - We Are Depleting Our Research Subject as We Are Investigating It In Language Technology, more Replication and Diversi.pdf:pdf},
  228. Url = {}
  229. }
  230. @InProceedings{BretonnelCohen2015,
  231. Title = {Annotateurs volontaires investis et \'ethique de l'annotation de lettres de suicid\'es},
  232. Author = {Bretonnel Cohen, Kevin and Pestian,, John P. and Fort, Kar{\"e}n},
  233. Booktitle = ACTESDE # { {ETeRNAL (Ethique et Traitement Automatique des Langues) (TALN '2015)}},
  234. Year = {2015},
  235. Address = {Caen, France},
  236. Month = jun,
  237. File = {ETeRNAL_Lettres_Suicides.pdf:https\://},
  238. Hal_id = {hal-01159052},
  239. Hal_version = {v1},
  240. Keywords = {suicide notes ; ethics ; annotation ; crowdsourcing ; corpus},
  241. Owner = {kfort},
  242. Timestamp = {2017.02.02},
  243. Url = {}
  244. }
  245. @InProceedings{CallisonBurch2010,
  246. Title = {Creating speech and language data with {Amazon's Mechanical Turk}},
  247. Author = {Callison-Burch, Chris and Dredze, Mark},
  248. Booktitle = ACTESDE # { Workshop on Creating Speech and Language Data with Amazon's Mechanical Turk (CSLDAMT '10) de NAACL HLT 2010},
  249. Year = {2010},
  250. Address = {Los Angeles, CA, } # usa,
  251. Month = jun,
  252. Publisher = {Association for Computational Linguistics},
  253. Keywords = {crowdsourcing, amt},
  254. Location = {Los Angeles, California},
  255. Owner = {fort},
  256. Timestamp = {2010.12.07},
  257. Url = {}
  258. }
  259. @Book{Calvet2002,
  260. Title = {{Le march{\'{e}} aux langues: Essai de politologie linguistique sur la mondialisation}},
  261. Author = {Calvet, Louis-Jean},
  262. Publisher = {Plon},
  263. Year = {2002},
  264. Abstract = {La p. de t. porte en outre: Les effets linguistiques de la mondialisation.},
  265. ISBN = {2-259-19660-8},
  266. Keywords = {PLPL},
  267. Mendeley-groups = {LRL},
  268. Pages = {220},
  269. Url = {{\_}march{\'{e}}{\_}aux{\_}langues.html?id=XAmKQgAACAAJ{\&}redir{\_}esc=y}
  270. }
  271. @InCollection{Chamberlain2013,
  272. Title = {Using Games to Create Language Resources: Successes and Limitations of the Approach},
  273. Author = {Chamberlain, Jon and Fort, Kar\"en and Kruschwitz, Udo and Lafourcade, Mathieu and Poesio, Massimo},
  274. Booktitle = {The People's Web Meets NLP},
  275. Publisher = {Springer Berlin Heidelberg},
  276. Year = {2013},
  277. Editor = {Gurevych, Iryna and Kim, Jungi},
  278. Pages = {3--44},
  279. Series = {Theory and Applications of Natural Language Processing},
  280. Doi = {10.1007/978-3-642-35085-6_1},
  281. ISBN = {978-3-642-35084-9},
  282. Keywords = {games},
  283. Language = {English},
  284. Owner = {fortkare},
  285. Timestamp = {2013.10.11},
  286. Url = {}
  287. }
  288. @InProceedings{Chamberlain2009a,
  289. Title = {A new life for a dead parrot: Incentive structures in the Phrase Detectives game},
  290. Author = {Chamberlain, Jonathan and Poesio, Massimo and Kruschwitz, Udo},
  291. Booktitle = ACTESDE # { WWW 2009},
  292. Year = {2009},
  293. Address = {Madrid, } # spa,
  294. Month = apr,
  295. File = {:/home/kfort/Dev/quaero/trunk/thesis/writing/Articles_biblio/www2009-1.pdf:PDF},
  296. Keywords = {annotation_game},
  297. Owner = {fort},
  298. Timestamp = {2011.12.12},
  299. Url = {}
  300. }
  301. @InProceedings{K.BretonnelCohen2018,
  302. Title = {Three Dimensions of Reproducibility in Natural Language Processing},
  303. Author = {K. Bretonnel Cohen and Jingbo Xia and Pierre Zweigenbaum and Tiffany Callahan and Orin Hargraves and Foster Goss and Nancy Ide and Aur\'elie N\'ev\'eol and Cyril Grouin and Lawrence E. Hunter},
  304. Booktitle = ACTESDE # { Eleventh International Conference on Language Resources and Evaluation (LREC 2018)},
  305. Year = {2018},
  306. Address = {Miyazaki, } # jap,
  307. Month = may,
  308. Date = {7-12},
  309. ISBN = {979-10-95546-00-9},
  310. Language = {english},
  311. Location = {Miyazaki, #jap#},
  312. Owner = {kfort},
  313. Timestamp = {2018.05.25}
  314. }
  315. @InCollection{colot2013,
  316. Title = {{Guadeloupean and Martinican Creole}},
  317. Author = {Colot, Serge and Ludwig, Ralph},
  318. Booktitle = {The survey of pidgin and creole languages.},
  319. Publisher = {Oxford University Press},
  320. Year = {2013},
  321. Editor = {Michaelis, Susanne Maria and Maurer, Philippe and Haspelmath, Martin and Huber, Magnus},
  322. Volume = {2},
  323. Mendeley-groups = {LRL/Creole},
  324. Owner = {alice},
  325. Timestamp = {2018.05.27},
  326. Url = {}
  327. }
  328. @Article{Cox2010,
  329. Title = {{Probabilistic tagging of minority language data: a case study using Qtag.}},
  330. Author = {Cox, Christopher},
  331. Journal = {Language {\&} Computers},
  332. Year = {2010},
  333. Number = {1},
  334. Pages = {213--231},
  335. Volume = {71},
  336. Abstract = {While probabilistic methods of part-of-speech tag assignment have long received consideration in corpus and computational-linguistic research, less attention would appear to have been paid to date to the development of tagging accuracy over rounds of iterative, interactive training in applications of these methods. Understanding this aspect of probabilistic tagging is arguably of particular importance to the successful construction of minority language corpora, where financial resources for corpus development are often limited and no fixed standards for either orthography or part of speech assignment may necessarily exist. This paper therefore presents a case study in the application of pure probabilistic tagging, as represented by Qtag (Tufis and Mason, 1998), to minority-language data from Mennonite Low German (Plautdietsch). Concentrating upon the relationship of several factors (including training data size, tag set complexity, and orthographic normalization) to the development of tagging accuracy, the present study conducts computational simulations of the iterative, interactive training process to compare the interactions of these factors quantitatively over time. The study concludes with a discussion of these factors' relevance to the development of accuracy in tagging as well as of potential confounds to the application of probabilistic tagging methods to similar minority language data. [ABSTRACT FROM AUTHOR]},
  337. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Cox - Unknown - Probabilistic tagging of minority language data a case study using Qtag.pdf:pdf},
  338. ISSN = {09215034},
  339. Keywords = {CASE studies,COMPUTER simulation,ITERATIVE methods (Mathematics),LINGUISTIC minorities,PROBABILISTIC number theory,TAGS (Metadata)},
  340. Url = {{~}cdcox/PDF/Cox2010ProbabilisticTaggingMinorityLanguageData.pdf{\&}db=afh{\&}AN=51613244{\&}lang=pt-br{\&}site=ehost-live}
  341. }
  342. @Book{Crevenat-Werner2008,
  343. Title = {Orthographe alsacienne - Bien {\'e}crire l'alsacien de Wissembourg {\`a} Ferrette},
  344. Author = {Cr{\'e}venat-Werner, Danielle and Zeidler, Edgar},
  345. Publisher = {J{\'e}r{\^o}me Do Bentzinger},
  346. Year = {2008}
  347. }
  348. @InProceedings{Dandapat2009,
  349. Title = {Complex Linguistic Annotation --- No Easy Way out!: A Case from Bangla and Hindi POS Labeling Tasks},
  350. Author = {Dandapat, Sandipan and Biswas, Priyanka and Choudhury, Monojit and Bali, Kalika},
  351. Booktitle = ACTESDE # { Linguistic Annotation Workshop},
  352. Year = {2009},
  353. Address = {Stroudsburg, PA, } # usa,
  354. Month = aug,
  355. Pages = {10--18},
  356. Series = {ACL-IJCNLP '09},
  357. Acmid = {1698383},
  358. ISBN = {978-1-932432-52-7},
  359. Location = {Suntec, Singapore},
  360. Numpages = {9},
  361. Url = {}
  362. }
  363. @InProceedings{Denis2010,
  364. Title = {{Exploitation d'une ressource lexicale pour la construction d'un {\'e}tiqueteur morphosyntaxique {\'e}tat-de-l'art du fran{\c c}ais}},
  365. Author = {Denis, Pascal and Sagot, Beno{\^\i}t},
  366. Booktitle = ACTESDE # { {Traitement Automatique des Langues Naturelles (TALN)}},
  367. Year = {2010},
  368. Address = montreal # {, Canada},
  369. Month = jul,
  370. Affiliation = {ALPAGE - INRIA Paris-Rocquencourt},
  371. Audience = {international },
  372. File = {taln10melt.pdf:http\://},
  373. Hal_id = {inria-00521231},
  374. Language = {French},
  375. Owner = {kfort},
  376. Timestamp = {2014.10.01},
  377. Url = {}
  378. }
  379. @Article{diki2007comment,
  380. Title = {{Comment assurer la pr{\'{e}}sence d'une langue dans le cyberespace}},
  381. Author = {Diki-Kidiri, Marcel},
  382. Journal = {UNESCO. Retrieved December},
  383. Year = {2007},
  384. Pages = {2007},
  385. Volume = {31}
  386. }
  387. @InProceedings{Fenouillet2009,
  388. Title = {Serious games et motivation},
  389. Author = {Fenouillet, Fabien and Kaplan, Jonathan and Yennek, Nora},
  390. Booktitle = ACTESDE # { 4{\`e}me Conf{\'e}rence francophone sur les Environnements Informatiques pour l'Apprentissage Humain (EIAH'09), vol. Actes de l'Atelier "Jeux S{\'e}rieux: conception et usages"},
  391. Year = {2009},
  392. Address = {Le Mans, France},
  393. Month = jun,
  394. Pages = {41--52},
  395. Keywords = {games},
  396. Owner = {fortkare},
  397. Timestamp = {2013.12.31}
  398. }
  399. @InProceedings{Fiser2014,
  400. Title = {{sloWCrowd: a Crowdsourcing Tool for Lexicographic Tasks}},
  401. Author = {Darja Fišer and Aleš Tavčar and Tomaž Erjavec},
  402. Booktitle = ACTESDE # { 9th International Conference on Language Resources and Evaluation (LREC'14)},
  403. Year = {2014},
  404. Address = {Reykjavik, } # iceland,
  405. Month = may,
  406. Date = {26-31},
  407. ISBN = {978-2-9517408-8-4},
  408. Language = {english},
  409. Owner = {helice},
  410. Timestamp = {2018.09.03}
  411. }
  412. @Book{Fort2016,
  413. Title = {Collaborative Annotation for Reliable Natural Language Processing},
  414. Author = {Kar\"{e}n Fort},
  415. Editor = {Patrick Paroubek},
  416. Publisher = {ISTE Wiley},
  417. Year = {2016},
  418. Series = {Focus series},
  419. Owner = {kfort},
  420. Timestamp = {2016.03.29}
  421. }
  422. @Article{Fort2011,
  423. Title = {{Amazon Mechanical Turk}: Gold Mine or Coal Mine?},
  424. Author = {Fort, Kar{\"e}n and Adda, Gilles and Kevin Bretonnel Cohen},
  425. Journal = {Computational Linguistics (editorial)},
  426. Year = {2011},
  427. Month = jun,
  428. Number = {2},
  429. Pages = {413--420},
  430. Volume = {37},
  431. Doi = {10.1162/COLI_a_00057},
  432. Keywords = {crowdsourcing, amt},
  433. Owner = {fort},
  434. Timestamp = {2011.01.24},
  435. Url = {}
  436. }
  437. @InProceedings{Fort2017a,
  438. Title = {{Who wants to play Zombie? A survey of the players on ZOMBILINGO}},
  439. Author = {Fort, Kar{\"e}n and Guillaume, Bruno and Lef{\`e}bvre, Nicolas},
  440. Booktitle = ACTESDE # { {Games4NLP 2017 - Using Games and Gamification for Natural Language Processing}},
  441. Year = {2017},
  442. Address = val # {, } # spa,
  443. Month = apr,
  444. Pages = {2},
  445. Series = {Symposium Games4NLP},
  446. File = {games4nlp_zl.pdf:https\://},
  447. Hal_id = {hal-01494043},
  448. Hal_version = {v1},
  449. Keywords = { motivation ; crowdsourcing ; games with a purpose},
  450. Owner = {kfort},
  451. Timestamp = {2017.05.19},
  452. Url = {}
  453. }
  454. @InProceedings{Fort2012d,
  455. Title = {Modeling the Complexity of Manual Annotation Tasks: a Grid of Analysis},
  456. Author = {Kar\"en Fort and Adeline Nazarenko and Sophie Rosset},
  457. Booktitle = ACTESDE # { International Conference on Computational Linguistics (COLING)},
  458. Year = {2012},
  459. Address = {Mumbai, } # india,
  460. Month = dec,
  461. Pages = {895--910},
  462. Owner = {fort},
  463. Timestamp = {2012.08.13}
  464. }
  465. @InProceedings{Fort2010,
  466. Title = {Influence of Pre-annotation on {POS}-tagged Corpus Development},
  467. Author = {Fort, Kar{\"e}n and Sagot, Beno{\^i}t},
  468. Booktitle = ACTESDE # { ACL Linguistic Annotation Workshop},
  469. Year = {2010},
  470. Address = {Uppsala, } # swed,
  471. Month = jul,
  472. Pages = {56--63},
  473. Keywords = {pre-annotation},
  474. Owner = {fort},
  475. Timestamp = {2010.05.18},
  476. Url = {}
  477. }
  478. @Article{Garcia2014,
  479. Title = {{PoS-tagging the web in portuguese. National varieties, text typologies and spelling systems}},
  480. Author = {Garcia, Marcos and Gamallo, Pablo and Gayo, Iria and Cruz, Miguel A.Pousada},
  481. Journal = {Procesamiento de Lenguaje Natural},
  482. Year = {2014},
  483. Pages = {95--101},
  484. Volume = {53},
  485. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Garcia et al. - 2014 - PoS-tagging the Web in Portuguese. National varieties, text typologies and spelling systems Anotaci{\'{o}}n morfo.pdf:pdf},
  486. ISSN = {19897553},
  487. Keywords = {PoS-tagging,Portuguese,Spelling agreement,Web as corpus},
  488. Url = {{\_}papers/edited{\_}paper{\_}21.pdf}
  489. }
  490. @InProceedings{Garrette2013,
  491. Title = {Real-World Semi-Supervised Learning of POS-Taggers for Low-Resource Languages},
  492. Author = {Dan Garrette and Jason Mielens and Jason Baldridge },
  493. Booktitle = ACTESDE # { 51st Annual Meeting of the Association for Computational Linguistics (ACL-2013)},
  494. Year = {2013},
  495. Address = {Sofia, } # bulg,
  496. Month = aug,
  497. Pages = {583--592},
  498. Series = {ACL '13}
  499. }
  500. @InProceedings{Geiger2011,
  501. Title = {Managing the Crowd: Towards a Taxonomy of Crowdsourcing Processes},
  502. Author = {Geiger, David and Seedorf, Stefan and Schulze, Thimo and Nickerson, Robert C. and Schader, Martin},
  503. Booktitle = ACTESDE # { AMCIS 2011},
  504. Year = {2011},
  505. Address = {Detroit, MI, } # usa,
  506. Month = aug,
  507. File = {:home/kfort/Dev/quaero/trunk/thesis/writing/Articles_biblio/Geiger_et_al._-_2011_-_Managing_the_Crowd_Towards_a_Taxonomy_of_Crowdsourcing_Processes.pdf:PDF},
  508. Keywords = {crowdsourcing, taxonomy},
  509. Owner = {fortkare},
  510. Timestamp = {2013.12.23},
  511. Url = {}
  512. }
  513. @InProceedings{Gesmundo2012,
  514. Title = {Lemmatisation As a Tagging Task},
  515. Author = {Gesmundo, Andrea and Samard\v{z}i\'{c}, Tanja},
  516. Booktitle = ACTESDE # { 50th Annual Meeting of the Association for Computational Linguistics: Short Papers - Volume 2},
  517. Year = {2012},
  518. Address = {Jeju, } # kor,
  519. Month = jul,
  520. Pages = {368--372},
  521. Series = {ACL '12},
  522. Acmid = {2390748},
  523. Numpages = {5},
  524. Url = {}
  525. }
  526. @InProceedings{Guillaume2016a,
  527. Title = {Crowdsourcing Complex Language Resources: Playing to Annotate Dependency Syntax},
  528. Author = {Bruno Guillaume and Kar\"en Fort and Nicolas Lefebvre},
  529. Booktitle = ACTESDE # { International Conference on Computational Linguistics (COLING)},
  530. Year = {2016},
  531. Address = {Osaka, } # jap,
  532. Month = dec,
  533. Owner = {kfort},
  534. Timestamp = {2016.09.23}
  535. }
  536. @InProceedings{hana2004,
  537. Title = {{A Resource-light Approach to Russian Morphology: Tagging Russian using Czech resources}},
  538. Author = {Jiri Hana and Anna Feldman and Chris Brew},
  539. Booktitle = ACTESDE # { Conference on Empirical Methods in Natural Language Processing (EMNLP)},
  540. Year = {2004},
  541. Address = barc # {, } # spa,
  542. Month = jul,
  543. Pages = {222--229},
  544. Publisher = {{ACL}},
  545. File = {HanaFeldmanBrew2004-RusMorphLite.pdf:http\://}
  546. }
  547. @Book{Hazael-Massieux2000,
  548. Title = {{Ecrire en cr{\'{e}}ole : Oralit{\'{e}} et {\'{e}}criture aux Antilles}},
  549. Author = {Haza{\"{e}}l-Massieux, Marie-Christine},
  550. Publisher = {L'Harmattan},
  551. Year = {2000},
  552. Mendeley-groups = {LRL/Creole}
  553. }
  554. @InProceedings{Hollenstein2014,
  555. Title = {Compilation of a Swiss German Dialect Corpus and its Application to PoS Tagging},
  556. Author = {Hollenstein, Nora and Aepli, No\"{e}mi},
  557. Booktitle = ACTESDE # { First Workshop on Applying {NLP} Tools to Similar Languages, Varieties and Dialects, VarDial@COLING 2014},
  558. Year = {2014},
  559. Address = {Dublin, } # ireland,
  560. Month = aug,
  561. Pages = {85--94},
  562. Owner = {kfort},
  563. Timestamp = {2017.02.02},
  564. Url = {}
  565. }
  566. @InProceedings{Hovy2014,
  567. Title = {Experiments with crowdsourced re-annotation of a POS tagging data set},
  568. Author = {Hovy, Dirk and Plank, Barbara and S{\o}gaard, Anders},
  569. Booktitle = ACTESDE # { 52nd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  570. Year = {2014},
  571. Address = {Baltimore, MD, } # usa,
  572. Month = jun,
  573. Pages = {377--382},
  574. Owner = {kfort},
  575. Timestamp = {2017.02.03},
  576. Url = {}
  577. }
  578. @InProceedings{Jamatia2014,
  579. Title = {Part-of-Speech Tagging System for Indian Social Media Text on Twitter},
  580. Author = {Jamatia, Anupam and Das, Amitava},
  581. Booktitle = ACTESDE # {Workshop on Language Technologies For Indian Social Media (SOCIAL-INDIA)},
  582. Year = {2014},
  583. Address = {Goa, } # india,
  584. Month = nov,
  585. Pages = {21-28},
  586. Owner = {alice},
  587. Timestamp = {2018.05.27}
  588. }
  589. @InProceedings{Klubicka2014,
  590. Title = {{Using crowdsourcing in building a morphosyntactically annotated and lemmatized silver standard corpus of Croatian}},
  591. Author = {Klubi{\v{c}}ka, Filip and Ljube{\v{s}}i{\'{c}}, Nikola},
  592. Booktitle = ACTESDE # { 9th Language Technologies Conference},
  593. Year = {2014},
  594. Address = {Ljubljana, } # slo,
  595. Month = oct,
  596. Abstract = {This paper describes the creation of a morphosyntactically tagged and lemmatized silver standard corpus by using crowdsourcing. A data set containing 50.322 tokens compiled from the Croatian web corpus hrWaC was annotated using TreeTagger and HunPos taggers trained on the SETimes.HR corpus. Tokens that the tools annotated differently were passed on to the crowd. The crowd looked through contested nouns, verbs and adjectives, while experts checked and corrected those that the crowd decided were incorrect, along with the remaining parts of speech the two taggers did not agree on. The evaluation of the crowdsourcing yielded a single worker's accuracy to be ∼90{\%}, and that of the majority answer of three workers to be ∼97{\%}. While intrinsic evaluation of the resource by calculating accuracy of morphosyntactic tags showed an improvement of 8{\%}, extrinsic evaluation of the corrected corpus on the task of morphosyntactic tagging produced an accuracy increase of little over 1{\%}. The results point to the conclusion that the use of crowdsourcing in creating and improving language resources is indeed useful, but in the case of using the improved resource for enhancing morphosyntactic tagging, given the amount of already available gold corpus data, accuracy should be improved by developing a lexicon. Uporaba mnoienja pri izdelavi oblikoskladenjsko oznaenega in lematiziranega korpusa hrvaine kot srebrnega standarda V prispevku opi{\v{s}}emo postopek izdelave oblikoskladenjsko ozna{\v{c}}enega in lematiziranega korpusa hrva{\v{s}}{\v{c}}ine z uporabo mno{\v{z}}i{\v{c}}enja. Podatkovna mno{\v{z}}ica, ki vsebuje 50.322 pojavnic, je bila vzor{\v{c}}ena iz hrva{\v{s}}kega korpusa spletnih besedil hrWaC in oznaena z ozna{\v{c}}evalnikoma TreeTagger in HunPos, ki sta se nau{\v{c}}ila modela jezika iz korpusa SETimes.HR. Pojavnice, ki sta jih programa ozna{\v{c}}ila razli{\v{c}}no, so bile z uporabo platforme za mno{\v{z}}i{\v{c}}enje ffzgMno{\v{s}}tvo posredovane mno{\v{z}}ici anotatorjev, ki so izmed obeh izbrali pravilno oznako. Mno{\v{z}}ica je pregledala sporne samostalnike, glagole in pridevnike, medtem ko so eksperti pregledali in popravili tiste oz-nake, za katere se je mno{\v{z}}ica odlo{\v{c}}ila, da so napa{\v{c}}ne pri obeh ozna{\v{c}}evalnikih, kot tudi preostale besedne vrste. Evalvacija mno{\v{z}}i{\v{c}}enja je pokazala, da je natan{\v{c}}nost posameznega anotatorja v povpre{\v{c}}ju ∼90{\%}, ve{\v{c}}inska odlo{\v{c}}itev treh anotatorjev pa ∼97{\%}. Medtem ko je intrinzina evalvacija vira z izraunom natan{\v{c}}nosti oblikoskladenjskih oznak pokazala izbolj{\v{s}}anje za 8{\%}, je ekstrinzi{\v{c}}na evalvacija popravl-jenega korpusa pri nalogi oblikoskladenjskega ozna{\v{c}}evanja pove{\v{c}}ala natan{\v{c}}nost ozna{\v{c}}evanja za malo ve{\v{c}} kot 1{\%}. Rezultati ka{\v{z}}ejo, da je uporaba mno{\v{z}}i{\v{c}}enja za izdelavo in izbolj{\v{s}}anje jezikovnih virov koristna, vendar pa ne za izbolj{\v{s}}anje oblikoskladenjskega ozna{\v{c}}evanja, kjer bi bilo, glede na koli{\v{c}}inokoli{\v{c}}inoˇkoli{\v{c}}ino{\v{z}}e dostopnih korpusnih podatkov kot zlatega standarda, mo{\v{c}}i bolje usmeriti v izdelavo leksikona.},
  597. File = {::},
  598. Keywords = {Croatian language,crowdsourcing,lemmatization,morphosyntactic annotation,silver standard},
  599. Owner = {helice},
  600. Timestamp = {2018.08.27},
  601. Url = {}
  602. }
  603. @Article{Krumm2008,
  604. Title = {User-Generated Content},
  605. Author = {Krumm, John and Davies, Nigel and Narayanaswami, Chandra},
  606. Journal = {IEEE Pervasive Computing},
  607. Year = {2008},
  608. Month = oct,
  609. Number = {4},
  610. Pages = {10--11},
  611. Volume = {7},
  612. Acmid = {1477242},
  613. Address = {Piscataway, NJ, } # usa,
  614. Doi = {10.1109/MPRV.2008.85},
  615. ISSN = {1536-1268},
  616. Issue_date = {October 2008},
  617. Keywords = {pervasive user-generated content},
  618. Numpages = {2},
  619. Publisher = {IEEE Educational Activities Department},
  620. Url = {}
  621. }
  622. @InProceedings{Lafourcade2008,
  623. Title = {{JeuxDeMots} : un prototype ludique pour l'\'emergence de relations entre termes},
  624. Author = {Mathieu Lafourcade and Alain Joubert},
  625. Booktitle = ACTESDE # { Journ\'ees internationales d'Analyse statistique des Donn\'ees Textuelles (JADT)},
  626. Year = {2008},
  627. Address = {Lyon, France},
  628. Month = mar,
  629. File = {:/home/kfort/Dev/quaero/trunk/thesis/writing/Articles_biblio/lafourcade-joubert.pdf:PDF},
  630. Keywords = {annotation_game},
  631. Owner = {fort},
  632. Timestamp = {2011.01.19},
  633. Url = {}
  634. }
  635. @Book{Lafourcade2015,
  636. Title = {Jeux et intelligence collective: r{\'e}solution de probl{\`e}mes et acquisition de donn{\'e}es sur le web},
  637. Author = {Lafourcade, Mathieu and Nathalie Lebrun and Alain Joubert},
  638. Editor = {Joseph Mariani and Patrick Paroubek},
  639. Publisher = {ISTE},
  640. Year = {2015},
  641. Series = {Collection science cognitive et management des connaissances},
  642. File = {:home/kfort/Documents/Livres/LIVRE-GWAP-MLNLBAJ-authors.pdf:PDF},
  643. ISBN = {9781784050528},
  644. Keywords = {GWAP},
  645. Owner = {kfort},
  646. Timestamp = {2015.07.20},
  647. Url = {}
  648. }
  649. @InProceedings{Leemann2015,
  650. Title = {Voice {\"A}pp: a mobile app for crowdsourcing Swiss German dialect data},
  651. Author = {Leemann, Adrian and Kolly, Marie-Jos{\'e} and Goldman, Jean-Philippe and Dellwo, Volker and Hove, Ingrid and Almajai, Ibrahim and Grimm, Sarah and Robert, Sylvain and Wanitsch, Daniel},
  652. Booktitle = ACTESDE # { INTERSPEECH 2015},
  653. Year = {2015},
  654. Address = dresden # {, } # ger,
  655. Month = sep
  656. }
  657. @InProceedings{li2012wiki,
  658. Title = {Wiki-ly Supervised Part-of-speech Tagging},
  659. Author = {Li, Shen and Gra\c{c}a, Jo\~{a}o V. and Taskar, Ben},
  660. Booktitle = ACTESDE # { 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
  661. Year = {2012},
  662. Address = {Jeju, } # kor,
  663. Month = jul,
  664. Pages = {1389--1398},
  665. Acmid = {2391106},
  666. Numpages = {10},
  667. Url = {}
  668. }
  669. @InProceedings{Liberm2016,
  670. Title = {Oral Histories: Linguistic Documentation as Social Media},
  671. Author = {Mark Liberman},
  672. Booktitle = ACTESDE # { NIEUW: Novel Incentives and Engineering Unique Workflows, organized by the Linguistic Data Consortium (LDC)},
  673. Year = {2016},
  674. Address = philadelphia # {, PA, } # usa,
  675. Month = oct,
  676. Owner = {kfort},
  677. Timestamp = {2017.02.03},
  678. Url = {}
  679. }
  680. @InProceedings{ljubevsic2016normalising,
  681. Title = {Normalising Slovene data: historical texts vs. user-generated content},
  682. Author = {Ljube{\v{s}}ic, Nikola and Zupan, Katja and Fi{\v{s}}er, Darja and Erjavec, Tomaz},
  683. Booktitle = ACTESDE # { 13th Conference on Natural Language Processing (KONVENS 2016)},
  684. Year = {2016},
  685. Address = {Bochum, } # ger,
  686. Month = sep,
  687. Pages = {146-155}
  688. }
  689. @InCollection{Ludwig1990,
  690. Title = {{Abr{\'{e}}g{\'{e}} de grammaire du cr{\'{e}}ole guadeloup{\'{e}}en}},
  691. Author = {Ludwig, Ralph and Montbrand, Dani{\`{e}}le and Poullet, Hector and Telchid, Sylviane},
  692. Booktitle = {Dictionnaire cr{\'{e}}ole fran{\c{c}}ais (Guadeloupe), avec un abr{\'{e}}g{\'{e}} de grammaire cr{\'{e}}ole et un lexique fran{\c{c}}ais-cr{\'{e}}ole},
  693. Publisher = {SERVEDIT},
  694. Year = {1990},
  695. Pages = {17--38}
  696. }
  697. @Book{malherbe1983langages,
  698. Title = {Les langages de l'humanit{\'e} (une encyclop{\'e}die des 3000 langues parl{\'e}es dans le monde)},
  699. Author = {Malherbe, Michel},
  700. Publisher = {Laffont},
  701. Year = {1983},
  702. Series = {Collection Bouquins}
  703. }
  704. @InProceedings{Mariani2014,
  705. Title = {{Rediscovering 15 Years of Discoveries in Language Resources and Evaluation: The LREC Anthology Analysis}},
  706. Author = {Mariani, Joseph and Paroubek, Patrick and Francopoulo, Gil and Hamon, Olivier},
  707. Booktitle = ACTESDE # { 9th International Conference on Language Resources and Evaluation (LREC'14)},
  708. Year = {2014},
  709. Address = {Reykjavik, } # iceland,
  710. Month = may,
  711. Abstract = {This paper aims at analyzing the content of the LREC conferences contained in the ELRA Anthology over the past 15 years (1998-2013). It follows similar exercises that have been conducted, such as the survey on the IEEE ICASSP conference series from 1976 to 1990, which served in the launching of the ESCA Eurospeech conference, a survey of the Association of Computational Linguistics (ACL) over 50 years of existence, which was presented at the ACL conference in 2012, or a survey over the 25 years (1987-2012) of the conferences contained in the ISCA Archive, presented at Interspeech 2013. It contains first an analysis of the evolution of the number of papers and authors over time, including the study of their gender, nationality and affiliation, and of the collaboration among authors. It then studies the funding sources of the research investigations that are reported in the papers. It conducts an analysis of the evolution of the research topics within the community over time. It finally looks at reuse and plagiarism in the papers. The survey shows the present trends in the conference series and in the Language Resources and Evaluation scientific community. Conducting this survey also demonstrated the importance of a clear and unique identification of authors, papers and other sources to facilitate the analysis. This survey is preliminary, as many other aspects also deserve attention. But we hope it will help better understanding and forging our community in the global village.},
  712. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Mariani et al. - Unknown - Rediscovering 15 Years of Discoveries in Language Resources and Evaluation The LREC Anthology Analysis.pdf:pdf},
  713. Keywords = {Bibliometrics,ELRA Anthology,ISLRN,Language Processing Systems Evaluation,Language Resources,Scientometrics,Social Networks,Text Analytics},
  714. Url = {{\_}Paper.pdf}
  715. }
  716. @Book{McEnery2011,
  717. Title = {Corpus Linguistics: Method, Theory and Practice},
  718. Author = {McEnery, Tony and Hardie, Andrew},
  719. Publisher = {Cambridge University Press},
  720. Year = {2011},
  721. Series = {Cambridge Textbooks in Linguistics},
  722. ISBN = {9781139502443},
  723. Owner = {kfort},
  724. Timestamp = {2017.01.18},
  725. Url = {\_ZT1qwC}
  726. }
  727. @InProceedings{Melero2012,
  728. Title = {Holaaa!! writin like u talk is kewl but kinda hard 4 NLP},
  729. Author = {Maite Melero and Marta R. Costa-Juss{\`a} and Judith Domingo and Montse Marquina and Mart{\'i} Quixal},
  730. Booktitle = ACTESDE # { International Conference on Language Resources and Evaluation (LREC'12)},
  731. Year = {2012},
  732. Address = {Istanbul, } # turk,
  733. Month = may,
  734. Date = {23-25},
  735. ISBN = {978-2-9517408-7-7},
  736. Language = {english}
  737. }
  738. @InProceedings{Millour2017a,
  739. Title = {{Why do we Need Games? Analysis of the Participation on a Crowdsourcing Annotation Platform}},
  740. Author = {Millour, Alice and Fort, Kar{\"e}n},
  741. Booktitle = ACTESDE # { {Games4NLP 2017 - Using Games and Gamification for Natural Language Processing}},
  742. Year = {2017},
  743. Address = {Valence, } # spa,
  744. Month = apr,
  745. Series = {Symposium Games4NLP},
  746. File = {games4nlp_bisame.pdf:https\://},
  747. Hal_id = {hal-01497088},
  748. Hal_version = {v1},
  749. Keywords = {crowdsourcing ; less-resourced languages ; Alsatian ; POS tagging},
  750. Owner = {kfort},
  751. Timestamp = {2017.12.18},
  752. Url = {}
  753. }
  754. @Article{Munro2013,
  755. Title = {Crowdsourcing and the Crisis-Affected Community: lessons learned and looking forward from Mission 4636},
  756. Author = {Munro, Robert},
  757. Journal = {Journal of Information Retrieval},
  758. Year = {2013},
  759. Number = {2},
  760. Pages = {210-266},
  761. Volume = {16},
  762. Owner = {kfort},
  763. Timestamp = {2017.02.04},
  764. Url = {}
  765. }
  766. @Article{Pestian2012,
  767. Title = {What's In a Note: Construction of a Suicide Note Corpus},
  768. Author = {Pestian, John P. and Matykiewicz, Pawel and Linn-Gust, Michelle},
  769. Journal = {Biomedical Informatics Insights},
  770. Year = {2012},
  771. Pages = {1--6},
  772. Volume = {5},
  773. Owner = {kfort},
  774. Timestamp = {2017.02.05},
  775. Url = {}
  776. }
  777. @InProceedings{petrov2011universal,
  778. Title = {A Universal Part-of-Speech Tagset},
  779. Author = {Slav Petrov and Dipanjan Das and Ryan McDonald},
  780. Booktitle = ACTESDE # { 8th International Conference on Language Resources and Evaluation (LREC'12)},
  781. Year = {2012},
  782. Address = {Istanbul, } # turk,
  783. Month = may,
  784. Date = {23-25},
  785. Language = {english}
  786. }
  787. @Book{DGLFLF_internet,
  788. Title = {{{\'{E}}tude sur la place des langues de France sur l'Internet}},
  789. Author = {Pimienta, Daniel and Prado, Daniel},
  790. Publisher = {DGLFLF},
  791. Year = {2014},
  792. File = {:home/alice/Documents/Work/Papr/PDF/LRL/LDF-DGLFLF/lr{\_}2014{\_}11{\_}lang-france-sur-internet.pdf:pdf},
  793. Mendeley-groups = {LRL/Creole}
  794. }
  795. @TechReport{Pingali2017,
  796. Title = {Phonetically-Aware Approximate Search for Low-Resource Languages},
  797. Author = {Pingali, Sriharini and Mortensen, David and Littell, Patrick and Levin, Lori},
  798. Institution = {Carnegie Mellon University},
  799. Year = {2017},
  800. Address = {Pittsburgh, PA, } # usa
  801. }
  802. @InProceedings{Plank2016,
  803. Title = {What to do about non-standard (or non-canonical) language in NLP},
  804. Author = {Plank, Barbara},
  805. Booktitle = ACTESDE # { 13th Conference on Natural Language Processing (KONVENS)},
  806. Year = {2016},
  807. Address = {Bochum, } # ger,
  808. Month = aug,
  809. Pages = {13-20},
  810. Abstract = {Real world data differs radically from the benchmark corpora we use in natural language processing (NLP). As soon as we apply our technologies to the real world, performance drops. The reason for this problem is obvious: NLP models are trained on samples from a limited set of canonical varieties that are considered standard, most prominently English newswire. However, there are many dimensions, e.g., socio-demographics, language, genre, sentence type, etc. on which texts can differ from the standard. The solution is not obvious: we cannot control for all factors, and it is not clear how to best go beyond the current practice of training on homogeneous data from a single domain and language. In this paper, I review the notion of canonicity, and how it shapes our community's approach to language. I argue for leveraging what I call fortuitous data, i.e., non-obvious data that is hitherto neglected, hidden in plain sight, or raw data that needs to be refined. If we embrace the variety of this heterogeneous data by combining it with proper algorithms, we will not only produce more robust models, but will also enable adaptive language technology capable of addressing natural language variation.},
  811. Archiveprefix = {arXiv},
  812. Arxivid = {1608.07836},
  813. Eprint = {1608.07836},
  814. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Plank - 2016 - What to do about non-standard (or non-canonical) language in NLP.pdf:pdf},
  815. Journal = {CoRR},
  816. Url = {}
  817. }
  818. @Article{Poesio2013,
  819. Title = {Phrase Detectives: Utilizing Collective Intelligence for Internet-scale Language Resource Creation},
  820. Author = {Poesio, Massimo and Chamberlain, Jon and Kruschwitz, Udo and Robaldo, Livio and Ducceschi, Luca},
  821. Journal = {ACM Trans. Interact. Intell. Syst.},
  822. Year = {2013},
  823. Month = apr,
  824. Number = {1},
  825. Pages = {3:1--3:44},
  826. Volume = {3},
  827. Acmid = {2448119},
  828. Address = {New York, NY, {\'E}tats-Unis},
  829. Articleno = {3},
  830. Doi = {10.1145/2448116.2448119},
  831. ISSN = {2160-6455},
  832. Issue_date = {#apr# 2013},
  833. Keywords = {Web cooperation, anaphora, corpus annotation, games with a purpose, human language technology, resource creation},
  834. Numpages = {44},
  835. Owner = {kfort},
  836. Publisher = {ACM},
  837. Timestamp = {2016.06.01},
  838. Url = {}
  839. }
  840. @InCollection{Prado2012,
  841. Title = {Pr{\'e}sence des langues dans le monde r{\'e}el et le cyberespace},
  842. Author = {Prado, Daniel},
  843. Booktitle = {Net.lang R{\'{e}}ussir le cyberespace multilingue},
  844. Publisher = {Vannini, Laurent and {Le Crosnier}, Herv{\'{e}}},
  845. Year = {2012},
  846. Edition = {C{\&}F {\'{e}}dition},
  847. Pages = {171--178},
  848. Owner = {alice},
  849. Timestamp = {2018.05.27}
  850. }
  851. @InCollection{Rivron2012,
  852. Title = {{L'usage de Facebook chez les {\'{E}}ton du Cameroun}},
  853. Author = {Rivron, Vassili},
  854. Booktitle = {Net.lang R{\'{e}}ussir le cyberespace multilingue},
  855. Publisher = {Vannini, Laurent and {Le Crosnier}, Herv{\'{e}}},
  856. Year = {2012},
  857. Edition = {C{\&}F {\'{e}}dition},
  858. Pages = {171--178}
  859. }
  860. @InProceedings{Sanchez-Marco2011,
  861. Title = {{Extending the tool, or how to annotate historical language varieties}},
  862. Author = {S{\'{a}}nchez-Marco, Cristina and Boleda, Gemma and Padr{\'{o}}, Llu{\'{i}}s},
  863. Booktitle = ACTESDE # {5th ACL-HLT workshop on language technology for cultural heritage, social sciences, and humanities (LaTeCH '11)},
  864. Year = {2011},
  865. Address = {Portland, OR, } # usa,
  866. Month = jun,
  867. Pages = {1--9},
  868. Abstract = {We present a general and simple method to adapt an existing NLP tool in order to enable it to deal with historical varieties of languages. This approach consists basically in expanding the dictionary with the old word variants and in retraining the tagger with a small training corpus. We implement this approach for Old Spanish. The results of a thorough evaluation over the extended tool show that using this method an almost state-of-the-art performance is ob-tained, adequate to carry out quantitative stud-ies in the humanities: 94.5{\%} accuracy for the main part of speech and 92.6{\%} for lemma. To our knowledge, this is the first time that such a strategy is adopted to annotate historical lan-guage varieties and we believe that it could be used as well to deal with other non-standard varieties of languages.},
  869. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/S{\'{a}}nchez-Marco, Boleda, Padr{\'{o}} - 2011 - Extending the tool, or how to annotate historical language varieties.pdf:pdf},
  870. Url = {{\&}id=2107637{\&}acc=OPEN{\&}key=4D4702B0C3E38B35.4D4702B0C3E38B35.4D4702B0C3E38B35.6D218144511F3437{\&}{\_}{\_}acm{\_}{\_}=1526726365{\_}63ad572bda7674eb32ad5d65ababb317}
  871. }
  872. @PhdThesis{Sagot2018,
  873. Title = {Informatiser le lexique},
  874. Author = {Beno{\^\i}t Sagot},
  875. School = {Institut national de recherche en informatique et en automatique (Inria)},
  876. Year = {2018},
  877. Month = jun,
  878. Type = {Habilitation \`a diriger des recherches en inguistique Informatique},
  879. Owner = {helice},
  880. Timestamp = {2018.09.03}
  881. }
  882. @InProceedings{Sagot2011,
  883. Title = {Un turc m\'ecanique pour les ressources linguistiques : critique de la myriadisation du travail parcellis\'e},
  884. Author = {Beno{\^i}t Sagot and Kar{\"e}n Fort and Gilles Adda and Joseph Mariani and Bernard Lang},
  885. Booktitle = ACTESDE # { Traitement Automatique des Langues Naturelles (TALN)},
  886. Year = {2011},
  887. Address = {Montpellier, France},
  888. Month = jun,
  889. Keywords = {crowdsourcing, amt},
  890. Owner = {fort},
  891. Timestamp = {2011.09.07},
  892. Url = {}
  893. }
  894. @InProceedings{Samardzic2015,
  895. Title = {{Normalising orthographic and dialectal variants for the automatic processing of Swiss German}},
  896. Author = {Samardzic, Tanja and Scherrer, Yves and Glaser, Elvira},
  897. Booktitle = ACTESDE # { 7th Language and Technology Conference},
  898. Year = {2015},
  899. Address = {Pozna\'n, } # pol,
  900. Month = nov
  901. }
  902. @InProceedings{scherrer2013,
  903. Title = {{Lexicon induction and part-of-speech tagging of non-resourced languages without any bilingual resources}},
  904. Author = {Scherrer, Yves and Sagot, Beno{\^i}t},
  905. Booktitle = ACTESDE # { {Workshop on Adaptation of language resources and tools for closely related languages and language variants}},
  906. Year = {2013},
  907. Address = {Hissar, } # bulg,
  908. Month = sep,
  909. Publisher = {RANLP '13},
  910. File = {langvar13.pdf:https\://},
  911. Hal_id = {hal-00862693},
  912. Hal_version = {v1},
  913. Url = {}
  914. }
  915. @TechReport{Schiller1995guidelines,
  916. Title = {{Guidelines f{\"u}r das Tagging deutscher Textcorpora mit STTS}},
  917. Author = {Schiller, Anne and Teufel, Simone and Thielen, Christine},
  918. Institution = {{Universit{\"a}ten Stuttgart und T{\"u}bingen}},
  919. Year = {1995},
  920. Journal = {{Universit{\"a}ten Stuttgart und T{\"u}bingen}},
  921. Pages = {345},
  922. Volume = {4}
  923. }
  924. @InBook{Schmid1997,
  925. Title = {New Methods in Language Processing, Studies in Computational Linguistics},
  926. Author = {Helmut Schmid},
  927. Chapter = {Probabilistic part-of-speech tagging using decision trees},
  928. Editor = {D. Jones, H. Somers (Eds.)},
  929. Pages = {154--164},
  930. Publisher = {UCL Press},
  931. Year = {1997},
  932. Comment = {TreeTagger},
  933. File = {:/home/kfort/Dev/quaero/trunk/thesis/writing/Articles_biblio/tree-tagger1.pdf:PDF},
  934. Keywords = {tagger},
  935. Owner = {fort},
  936. Timestamp = {2009.04.08},
  937. Url = {}
  938. }
  939. @InProceedings{Steible2018,
  940. Title = {{Pronunciation Dictionaries for the Alsatian Dialects to Analyze Spelling and Phonetic Variation}},
  941. Author = {Steible, Lucie and Bernhard, Delphine},
  942. Booktitle = ACTESDE # { 11th edition of the Language Resources and Evaluation Conference},
  943. Year = {2018},
  944. Address = {Miyazaki, } # jap,
  945. Month = may,
  946. Hal_id = {hal-01704814},
  947. Hal_version = {v1},
  948. Url = {}
  949. }
  950. @InProceedings{Toutanova2003,
  951. Title = {Feature-rich Part-of-speech Tagging with a Cyclic Dependency Network},
  952. Author = {Toutanova, Kristina and Klein, Dan and Manning, Christopher D. and Singer, Yoram},
  953. Booktitle = ACTESDE # { Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology},
  954. Year = {2003},
  955. Address = {Stroudsburg, PA, } # usa,
  956. Month = may,
  957. Pages = {173--180},
  958. Acmid = {1073478},
  959. Doi = {10.3115/1073445.1073478},
  960. Location = {Edmonton, Canada},
  961. Numpages = {8},
  962. Url = {}
  963. }
  964. @InProceedings{Tseng2005,
  965. Title = {{Morphological features help POS tagging of unknown words across language varieties}},
  966. Author = {Tseng, Huihsin and Jurafsky, Daniel and Manning, Christopher},
  967. Booktitle = ACTESDE # { Fourth SIGHAN Workshop on Chinese Language Processing},
  968. Year = {2005},
  969. Address = {Jeju, } # kor,
  970. Month = oct,
  971. Pages = {32--39},
  972. Abstract = {Part-of-speech tagging, like any supervised statistical NLP task, is more difficult when test sets are very different from training sets, for example when tag-ging across genres or language varieties. We exam-ined the problem of POS tagging of different varieties of Mandarin Chinese (PRC-Mainland, PRC-Hong Kong, and Taiwan). An analytic study first showed that unknown words were a major source of difficulty in cross-variety tagging. Unknown words in English tend to be proper nouns. By contrast, we found that Mandarin unknown words were mostly common nouns and verbs. We showed these results are caused by the high frequency of morphological compounding in Mandarin; in this sense Mandarin is more like German than English. Based on this analy-sis, we propose a variety of new morphological un-known-word features for POS tagging, extending earlier work by others on unknown-word tagging in English and German. Our features were implemented in a maximum entropy Markov model. Our system achieves state-of-the-art performance in Mandarin tagging, including improving unknown-word tagging performance on unseen varieties in Chinese Treebank 5.0 from 61{\%} to 80{\%} correct.},
  973. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Tseng, Jurafsky, Manning - Unknown - Morphological features help POS tagging of unknown words across language varieties(2).pdf:pdf},
  974. Url = {{~}jurafsky/sighan{\_}pos.pdf}
  975. }
  976. @InProceedings{Tuite2014,
  977. Title = {GWAPs: Games with a Problem},
  978. Author = {Tuite, Kathleen},
  979. Booktitle = ACTESDE # { 9th International Conference on the Foundations of Digital Games},
  980. Year = {2014},
  981. Address = {Liberty of the Seas, } # caribbean,
  982. Month = apr,
  983. Owner = {kfort},
  984. Timestamp = {2016.12.06}
  985. }
  986. @InProceedings{VanRooy,
  987. Title = {{An evaluation of three POS taggers for the tagging of the Tswana Learner English Corpus}},
  988. Author = {{Van Rooy}, Bertus and Sch{\"{a}}fer, Lande},
  989. Booktitle = ACTESDE # { Corpus Linguistics 2003 conference},
  990. Year = {2003},
  991. Address = {Lancaster, } # uk,
  992. Month = mar,
  993. Pages = {835-844},
  994. Volume = {16},
  995. Abstract = {1. Introduction Before starting with part of speech (POS) tagging on our corpus of learner English we decided to evaluate three POS taggers to see which one gives the best results when tagging written second language English. We evaluated the taggers' performance to determine which tagger would be most suitable for linguistic analyses on a POS-tagged corpus that had not been tag-edited. Once the accuracy of the taggers had been determined, we investigated the factors that contributed to inaccuracy with a view to establish time and cost effective ways of increasing tagger accuracy without necessarily tag-editing the corpus from beginning to end. The aim of this research was to explore the possibility of selective tag editing based upon specific tokens or tags frequently associated with tagging errors. 2. The Tswana Learner English Corpus project and the POS taggers At the end of 2000 we started compiling the Tswana Learner English Corpus (TLEC). Setswana is spoken as a mother tongue in the North West and Northern Cape Provinces of South Africa and in Botswana. Most of the South African speakers of Setswana learn English as a second language in school. When it is complete, this 200 000-word corpus will form part of the International Corpus of Learner English (ICLE). Currently there is significant movement towards the fostering of a local human language technology industry in South Africa. Our aim is to collect and POS tag more corpora of South African varieties of English and of the other ten indigenous languages. As TLEC will be the first POS tagged corpus of a South African variety of English, we wanted to establish procedures for time and cost-effective POS tag-editing.},
  996. File = {:home/alice/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Van Rooy, Sch{\"{a}}fer - Unknown - An evaluation of three POS taggers for the tagging of the Tswana Learner English Corpus.pdf:pdf},
  997. Url = {{\&}Expires=1526729599{\&}Signature=1w4Wb1IJvb4sy3KXrziyTbqwoh0{\%}3D{\&}response-content-disposition=inline{\%}3B filename{\%}3DAn{\_}evaluation{\_}of{\_}three{\_}POS{\_}taggers{\_}for{\_}t.pdf}
  998. }
  999. @InProceedings{vergez2014pos,
  1000. Title = {Pos-tagging different varieties of Occitan with single-dialect resources},
  1001. Author = {Vergez-Couret, Marianne and Urieli, Assaf and Foix, France},
  1002. Booktitle = ACTESDE # { First Workshop on Applying {NLP} Tools to Similar Languages, Varieties and Dialects, VarDial@COLING 2014},
  1003. Year = {2014},
  1004. Address = {Dublin, } # ireland,
  1005. Month = aug,
  1006. Pages = {21-29}
  1007. }
  1008. @InProceedings{Zaghouani2014,
  1009. Title = {Can Crowdsourcing be used for Effective Annotation of Arabic?},
  1010. Author = {Zaghouani, Wajdi and Dukes, Kais},
  1011. Booktitle = ACTESDE # { 9th International Conference on Language Resources and Evaluation (LREC'14)},
  1012. Year = {2014},
  1013. Address = {Reykjavik, } # iceland,
  1014. Month = may,
  1015. Owner = {alice},
  1016. Timestamp = {2018.05.27}
  1017. }
  1018. @InProceedings{zennaki2006,
  1019. Title = {{Inducing Multilingual Text Analysis Tools Using Bidirectional Recurrent Neural Networks}},
  1020. Author = {Zennaki, Othman and Semmar, Nasredine and Besacier, Laurent},
  1021. Booktitle = ACTESDE # { International Conference on Computational Linguistics (COLING)},
  1022. Year = {2016},
  1023. Address = {Osaka, } # jap,
  1024. Month = dec,
  1025. File = {coling-2016-3.pdf:https\://},
  1026. Hal_id = {hal-01374205},
  1027. Hal_version = {v1},
  1028. Keywords = {RNN ; POS tagging ; Supersense tagging ; crosslingual annotation projection},
  1029. Url = {}
  1030. }
  1031. @comment{jabref-meta: databaseType:bibtex;}