Korrektur Ausgabe Barcode-Analyse
This commit is contained in:
parent
27eae7ebe4
commit
1c5cda8e9b
|
@ -228,7 +228,7 @@ tasks:
|
||||||
# Bearbeitungsstand
|
# Bearbeitungsstand
|
||||||
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||||
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||||
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l output/barcodes/duplicates)"'
|
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/barcodes/duplicates)"'
|
||||||
# sources:
|
# sources:
|
||||||
# - input/*
|
# - input/*
|
||||||
# generates:
|
# generates:
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# Ermitteln von Dubletten in Barcodes
|
|
||||||
|
|
||||||
mkdir -p output output/barcodes
|
|
||||||
|
|
||||||
# Bibliotheca Barcodes extrahieren
|
|
||||||
for f in input/*.imp; do
|
|
||||||
grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- > output/barcodes/"${f##*/}.txt"
|
|
||||||
done
|
|
||||||
# Alephino Barcodes extrahieren
|
|
||||||
for f in input/*-exemplare.txt; do
|
|
||||||
grep '^120 ' "$f" | cut -c 6- > output/barcodes/"${f##*/}.txt"
|
|
||||||
done
|
|
||||||
|
|
||||||
# Dubletten ermitteln
|
|
||||||
sort output/barcodes/*.txt | uniq -d > output/barcodes/duplicates
|
|
||||||
(cd output/barcodes && for f in *.txt ; do
|
|
||||||
grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp
|
|
||||||
done)
|
|
||||||
paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp
|
|
Loading…
Reference in New Issue