From b28ab4ed0f9fc5cab3a49fe03308928d36606683 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Fri, 29 Jan 2021 16:40:49 +0100 Subject: [PATCH] Analyse dubletter Barcodes --- barcode-analyse.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 barcode-analyse.sh diff --git a/barcode-analyse.sh b/barcode-analyse.sh new file mode 100755 index 0000000..e3d2f01 --- /dev/null +++ b/barcode-analyse.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Ermitteln von Dubletten in Barcodes + +mkdir -p output output/barcodes + +# Bibliotheca Barcodes extrahieren +for f in input/*.imp; do + grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- > output/barcodes/"${f##*/}.txt" +done +# Alephino Barcodes extrahieren +for f in input/*-exemplare.txt; do + grep '^120 ' "$f" | cut -c 6- > output/barcodes/"${f##*/}.txt" +done + +# Dubletten ermitteln +sort output/barcodes/*.txt | uniq -d > output/barcodes/duplicates +(cd output/barcodes && for f in *.txt ; do + grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp +done) +paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp