- De
- En
@article{2_136421, abstract = {"Abstract The paper gives a brief introduction about the workflow management platform, Flowable, and how it is used for textual-data management. It is relatively new with its first release on 13 October, 2016. Despite the short time on the market, it seems to be quickly well-noticed with 4.6 thousand stars on GitHub at the moment. The focus of our project is to build a platform for text analysis on a large scale by including many different text resources. Currently, we have successfully connected to four different text resources and obtained more than one million works. Some resources are dynamic, which means that they might add more data or modify their current data. Therefore, it is necessary to keep data, both the metadata and the raw data, from our side up to date with the resources. In addition, to comply with FAIR principles, each work is assigned a persistent identifier (PID) and indexed for searching purposes. In the last step, we perform some standard analyses on the data to enhance our search engine and to generate a knowledge graph. End-users can utilize our platform to search on our data or get access to the knowledge graph. Furthermore, they can submit their code for their analyses to the system. The code will be executed on a High-Performance Cluster (HPC) and users can receive the results later on. In this case, Flowable can take advantage of PIDs for digital objects identification and management to facilitate the communication with the HPC system. As one may already notice, the whole process can be expressed as a workflow. A workflow, including error handling and notification, has been created and deployed. Workflow execution can be triggered manually or after predefined time intervals. According to our evaluation, the Flowable platform proves to be powerful and flexible. Further usage of the platform is already planned or implemented for many of our projects."}, author = {Triet Ho Anh Doan and Sven Bingert and Ramin Yahyapour}, doi = {10.1162/dint_a_00139}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/136421}, month = {01}, title = {Using a Workflow Management Platform in Textual Data Management}, type = {article}, year = {2022}, }
@article{2_116509, author = {Triet Ho Anh Doan and Zeki Mustafa Doğan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/116509}, month = {01}, title = {OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke}, type = {article}, year = {2020}, }
@misc{2_121682, abstract = {"Bereits seit einigen Jahren werden große Anstrengungen unternommen, um die im deutschen Sprachraum erschienenen Drucke des 16.-18. Jahrhunderts zu erfassen und zu digitalisieren. Deren Volltexttransformation konzeptionell und technisch vorzubereiten, ist das übergeordnete Ziel des DFG-Projekts OCR-D, das sich mit der Weiterentwicklung von Verfahren der Optical Character Recognition befasst. Der Beitrag beschreibt den aktuellen Entwicklungsstand der OCR-D-Software und analysiert deren erste Teststellung in ausgewählten Bibliotheken."}, author = {Konstantin Baierer and Matthias Boenig and Elisabeth Engl and Clemens Neudecker and Reinhard Altenhöner and Alexander Geyken and Johannes Mangei and Rainer Stotzka and Andreas Dengel and Martin Jenckel and Alexander Gehrke and Frank Puppe and Stefan Weil and Robert Sachunsky and Lena K. Schiffer and Maciej Janicki and Gerhard Heyer and Florian Fink and Klaus U. Schulz and Nikolaus Weichselbaumer and Saskia Limbach and Mathias Seuret and Rui Dong and Manuel Burghardt and Vincent Christlein and Triet Ho Anh Doan and Zeki Mustafa Dogan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121682}, month = {01}, title = {OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative}, type = {misc}, url = {https://publications.goettingen-research-online.de/handle/2/116509}, year = {2020}, }
Topic | Professor | Type |
---|---|---|
Comparing performance of Remote Visualisation techniques | Prof. Julian Kunkel | BSc, MSc |
Recommendation System for performance monitoring and analysis in HPC | Prof. Julian Kunkel | BSc, MSc |
@article{2_133248, author = {Hendrik Nolte and Nicolai Spicher and Andrew Russel and Tim Ehlers and Sebastian Krey and Dagmar Krefting and Julian Kunkel}, doi = {10.1016/j.future.2022.12.019}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/133248}, month = {01}, title = {Secure HPC: A workflow providing a secure partition on an HPC system}, type = {article}, year = {2023}, }
@article{2_129372, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Hendrik Nolte and Philipp Wieder}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129372}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/114449}, year = {2022}, }
@article{2_114449, abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}, author = {Philipp Wieder and Hendrik Nolte}, doi = {10.3389/fdata.2022.945720}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449}, month = {01}, title = {Toward data lakes as central building blocks for data management and analysis}, type = {article}, year = {2022}, }
@article{2_129373, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/129373}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, url = {https://publications.goettingen-research-online.de/handle/2/121151}, year = {2022}, }
@article{2_121151, author = {Hendrik Nolte and Philipp Wieder}, doi = {10.1162/dint_a_00141}, grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151}, month = {01}, title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}, type = {article}, year = {2022}, }
Topic | Professor | Type |
---|---|---|
Development of a provenance aware ad-hoc interface for a data lake | Prof. Julian Kunkel | BSc, MSc |
Semantic classification of metadata attributes in a data lake using machine learning | Prof. Julian Kunkel | BSc, MSc |
Governance for a Data Lake | Prof. Julian Kunkel | BSc, MSc |