Megachile rotundata
Jump to navigation
Jump to search
Data
Original Traces
- 8 pairs of data files (paired ends)
cat trace.count | grep _1_ | sed 's/_sequence.txt//' | perl -ane 'print " ",$F[1],"\t",$F[0]/4,"\t",$F[0]/2,"\n";'
lib insert mates reads readLen ~coverage(500M genome) reverse adaptors comments s_2_3kbp 3000 21,563,283 43,126,566 124 11 ? circularizarion #s_2_5kbp 5000/300 36218589 72,437,178 35 5 yes ? insert size is << 5kbp s_2_8kbp 8000 198377 396,754 124 0.1 ? ? s_3 475 35548153 71,096,306 124 18 s_4 475 35471044 70,942,088 124 18 s_5 475 35616846 71,233,692 124 18 s_6 475 35303840 70,607,680 124 18 s_7 475 34893313 69,786,626 124 18 total . 198,594,856 397,189,712
Corrected Traces
- Mated ones
lib insert mates reads s_2_3kb 3000 4,823,235 9,646,470 s_2_8kb 8000 111,267 222,534 s_3 475 33,024,597 66,049,194 s_4 475 33,237,593 66,475,186 s_5 475 33,150,790 66,301,580 s_6 475 33,223,371 66,446,742 s_7 475 32,647,890 65,295,780 total . 170,218,743 340,437,486
Adaptors
>circularizarion CGTAATAACTTCGTATAGCATACATTATACGAAGTTATACGA >circularizarion.revcomp TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG
Location
/fs/szattic-asmg5/Bees/Megachile_rotundata/error_correction/large_libs/s_?_?_?kb.sequence.cor.all.txt /fs/szattic-asmg5/Bees/Megachile_rotundata/error_free/s_?_?_sequence.cor.txt /fs/szattic-asmg5/Bees/Megachile_rotundata/frg/ # frg files to assemble
Assemblies
- CA Version: 6.1 (09/01/2010) /fs/szdevel/dpuiu/SourceForge/wgs-6.1/Linux-amd64/bin/runCA
- SOAP version 1.04: /nfshomes/dpuiu/szdevel/SOAPdenovo_Release1.04/
CA noOBT partial
- Data : 3 libraries
LibraryName numActiveFRG numDeletedFRG numMatedFRG readLength clearLength GLOBAL 72995448 0 70632632 8307194830 8278360381 LegacyUnmatedReads 0 0 0 0 0 s_2_3kb 9166343 0 8736228 942501164 914798596 s_2_8kb 210266 0 199620 21669112 20742291 s_3 63618839 0 61696784 7343024554 7342819494
- Stats
. elem min q1 q2 q3 max mean n50 sum utg 1437146 64 123 143 195 67048 308.78 870 443759899 ctg 37494 65 2185 3998 7706 191323 6380.39 10151 239226293 deg 1136469 64 123 143 184 5031 160.11 164 181954480 scf 20827 122 3228 6374 13700 202495 11508.95 20462 239696810
- Locations
ginkgo:/scratch1/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT-partial/
Moved from mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT-partial/
CA noOBT
Gatekeeper
- ~ 74X cvg
LibraryName numActiveFRG numDelFRG numMatedFRG readLength clearLength #repeats GLOBAL 326,236,387 0 315518526 37451489553 37418130441 LegacyUnmatedReads 0 0 0 0 0 s_2_3kb 9107424 0 9107424 942165284 910444046 # s_2_8kb 209336 0 209336 21814418 20787384 # s_3 63618839 0 61696784 7343024554 7342819494 # s_4 63544688 0 61255960 7291557748 7291478152 # s_5 63370860 0 61084368 7271218123 7271051639 # s_6 63780887 0 61685156 7359094156 7359012512 # s_7 62604353 0 60479498 7222615270 7222537214 #
Meryl
meryl -Dh -s 0-mercounts/asm-C-ms22-cm0 Found 30570218845 mers. Found 271464470 distinct mers. Found 11164787 unique mers. Largest mercount is 87984949; 1896 mers are too big for histogram. 1 11164787 0.0411 0.0004 2 9376915 0.0757 0.0010 3 3714582 0.0894 0.0013 ... 54 5344148 0.6573 0.1788 ...
fasta2tab.pl 0-mercounts/asm.nmers.ovl.fasta | sort -n -r | head -5 87,908,217 AATCATACAATCACAATCATAC 84,450,288 CAATCATACAATCACAATCATA ... 74,975,282 AATAATATGAGTTAGATTGATA
egrep -c 'AATCATACAATCACAATCATAC|GTATGATTGTGATTGTATGATT' *fastq *txt > egrep.count mulberry:/scratch2/dpuiu/Megachile_rotundata/Data/error_free/egrep.count
meryl -Dh -s 0-mercounts/asm-C-ms15-cm0 | head Found 32850820919 mers. Found 142500876 distinct mers. Found 2381895 unique mers. Largest mercount is 125816941; 2023 mers are too big for histogram. 1 2381895 0.0167 0.0001 2 2325770 0.0330 0.0002 3 708786 0.0380 0.0003 ... 54 1851586 0.4894 0.0671 ...
Overlap
- job count :
cat 1-overlapper/ovlopts.pl | grep ^\"h | wc -l 924
- Failures: 709 jobs failed; runCA 6.1 could not restart overlap properly !!!
cat 1-overlap/overlap*out | grep "^Could not" | sort -u Could not malloc memory (1305184948 bytes)
cat 1-overlapper/*pl | grep ^\"0 | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.0 cat 1-overlapper/*pl | grep ^\"h | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.h cat 1-overlapper/*pl | grep ^\"-h | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.-h paste 1-overlapper/ovlopts.pl.* | p 'print "overlap -M 8GB --hashload 0.8 -t 1 -h $F[3] -r $F[5] -k 22 -k \ ./0-mercounts/asm.nmers.ovl.fasta -o ./1-overlapper/$F[0]/$F[1].ovb.gz ./asm.gkpStore > \ ./1-overlapper/overlap.0$..out \n";' | tail -709 > overlap.sh
- Stats
overlapStore -d asm.ovlStore | awk '{print $1}' | uniq -c | awk '{print $1}' | count.pl | getSummary.pl -i 0 -j 1 overlapStats -G asm.gkpStore -O asm.ovlStore -o asm
Location
mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT
SOAPdenovo (Tanja)
cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -t "contigs" cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -min 100 -t "contigs(>100bp)" grep "^>" *.scaf | getSummary.pl -i 2 -t scaf
- Stats
. elem min q1 q2 q3 max mean n50 sum contigs 9742349 31 32 33 37 114832 60.09 44 585430821 contigs(>100bp) 177327 100 131 261 1398 114832 1333.68 3897 236496823 # N50 for Bee was 7K scaf 7863 102 903 3272 17692 2338728 37825.70 240706 297423517 # N50 for Bee was 1.17M
- Location
/fs/szattic-asmg5/Bees/Megachile_rotundata/Assembly/assembly5kbForAll
SOAPdenovo (Daniela)
- Stats
cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}' > asm.K31.contigLen.count
. elem min q1 q2 q3 max mean n50 sum contigs(all) 6,917,796 31 32 34 40 121554 70.46 73 487,401,812 contigs(>100bp) 210,666 100 124 222 1174 121554 1108.69 3138 233,563,401 scaff 25,119 351 1896 4444 10914 1102803 11041.00 26876 277,338,897
reads 340,437,486 readsOnContigs 171,212,613
- Location
mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/SOAPdenovo-redo