Megachile rotundata: Difference between revisions
		
		
		
		Jump to navigation
		Jump to search
		
| Line 168: | Line 168: | ||
| * Stats   | * Stats   | ||
|    .                    elem       min    q1     q2     q3     max        mean       n50        sum              |    .                    elem       min    q1     q2     q3     max        mean       n50        sum              | ||
|    contigs(all)          |    contigs(all)         6,917,796  31     32     34     40     121554     70.46      73         487,401,812        | ||
|    contigs(>100bp)       |    contigs(>100bp)      210,666    100    124    222    1174   121554     1108.69    3138       233,563,401 | ||
|    scaff                 |    scaff                25,119     351    1896   4444   10914  1102803    11041.00   26876      277,338,897 | ||
|   readOnContig         171,212,613  | |||
|    cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}'  > asm.K31.contigLen.count |    cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}'  > asm.K31.contigLen.count | ||
Revision as of 15:21, 6 September 2010
Data
Original Traces
- 8 pairs of data files (paired ends)
cat trace.count | grep _1_ | sed 's/_sequence.txt//' | perl -ane 'print " ",$F[1],"\t",$F[0]/4,"\t",$F[0]/2,"\n";'
lib insert mates reads readLen ~coverage(500M genome) reverse adaptors comments s_2_3kbp 3000 21,563,283 43,126,566 124 11 ? circularizarion #s_2_5kbp 5000/300 36218589 72,437,178 35 5 yes ? insert size is << 5kbp s_2_8kbp 8000 198377 396,754 124 0.1 ? ? s_3 475 35548153 71,096,306 124 18 s_4 475 35471044 70,942,088 124 18 s_5 475 35616846 71,233,692 124 18 s_6 475 35303840 70,607,680 124 18 s_7 475 34893313 69,786,626 124 18 total . 198,594,856 397,189,712
Corrected Traces
- Mated ones
lib insert mates reads s_2_3kb 3000 4,823,235 9,646,470 s_2_8kb 8000 111,267 222,534 s_3 475 33,024,597 66,049,194 s_4 475 33,237,593 66,475,186 s_5 475 33,150,790 66,301,580 s_6 475 33,223,371 66,446,742 s_7 475 32,647,890 65,295,780 total . 170,218,743 340,437,486
Adaptors
>circularizarion CGTAATAACTTCGTATAGCATACATTATACGAAGTTATACGA >circularizarion.revcomp TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG
Location
/fs/szattic-asmg5/Bees/Megachile_rotundata/error_correction/large_libs/s_?_?_?kb.sequence.cor.all.txt ftp://ftp.cbcb.umd.edu/pub/data/assembly/Megachile_rotundata/reads/s_?_?_?kb.sequence.cor.all.txt.gz /fs/szattic-asmg5/Bees/Megachile_rotundata/frg # frg files to assemble
Assemblies
- CA Version: 6.1 (09/01/2010) /fs/szdevel/dpuiu/SourceForge/wgs-6.1/Linux-amd64/bin/runCA
- SOAP version 1.04: /nfshomes/dpuiu/szdevel/SOAPdenovo_Release1.04/
CA noOBT partial
- Bog consensus : 4014 error reads in 39 unitigs
 grep FAILED 5-consensus/asm*err | wc -l                                # 4014
 grep FAILED 5-consensus/asm*err | awk '{print $3}' | sort -u | wc -l   # 39
- Locations
mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT-partial.failed mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT-partial
CA noOBT
Gatekeeper
- ~ 74X cvg
 LOAD                  STATS          
 7                     libInput       
 7                     libLoaded      
 0                     libErrors      
 5                     libWarnings    
 
 326,236,387           frgLoaded        
 326236387             numRandom      
 326236387             numPacked       
 LibraryName           numActiveFRG  numDelFRG  numMatedFRG  readLength   clearLength    #AATCATACAATCACAATCATAC|GTATGATTGTGATTGTATGATT   #AATCATACAATCAC|GTGATTGTATGATT   #AATCATACAATCAC|GTGATTGTATGATT(1mutation)
                                                                                                                                                             
 GLOBAL                326,236,387   0          315518526    37451489553  37418130441  
 LegacyUnmatedReads    0             0          0            0            0            
 s_2_3kb               9107424       0          9107424      942165284    910444046      #1125738  12%                                    1366161
 s_2_8kb               209336        0          209336       21814418     20787384       #46762    22%                                    55736(26%)                          68729(32%)
 s_3                   63618839      0          61696784     7343024554   7342819494     #         11%
 s_4                   63544688      0          61255960     7291557748   7291478152     #6804757  11%
 s_5                   63370860      0          61084368     7271218123   7271051639     #         11%
 s_6                   63780887      0          61685156     7359094156   7359012512     #         11%
 s_7                   62604353      0          60479498     7222615270   7222537214     #         11%
Meryl
meryl -Dh -s 0-mercounts/asm-C-ms22-cm0 Found 30570218845 mers. Found 271464470 distinct mers. Found 11164787 unique mers. Largest mercount is 87984949; 1896 mers are too big for histogram. 1 11164787 0.0411 0.0004 2 9376915 0.0757 0.0010 3 3714582 0.0894 0.0013 ... 54 5344148 0.6573 0.1788 ...
fasta2tab.pl 0-mercounts/asm.nmers.ovl.fasta | sort -n -r | head -5 87,908,217 AATCATACAATCACAATCATAC 84,450,288 CAATCATACAATCACAATCATA ... 74,975,282 AATAATATGAGTTAGATTGATA
egrep -c 'AATCATACAATCACAATCATAC|GTATGATTGTGATTGTATGATT' *fastq *txt > egrep.count mulberry:/scratch2/dpuiu/Megachile_rotundata/Data/error_free/egrep.count
meryl -Dh -s 0-mercounts/asm-C-ms15-cm0 | head Found 32850820919 mers. Found 142500876 distinct mers. Found 2381895 unique mers. Largest mercount is 125816941; 2023 mers are too big for histogram. 1 2381895 0.0167 0.0001 2 2325770 0.0330 0.0002 3 708786 0.0380 0.0003 ... 54 1851586 0.4894 0.0671 ...
Overlap
- job count :
cat 1-overlapper/ovlopts.pl | grep ^\"h | wc -l 924
- Failures: 709 jobs failed; runCA 6.1 could not restart overlap properly !!!
cat 1-overlap/overlap*out | grep "^Could not" | sort -u Could not malloc memory (1305184948 bytes)
 cat 1-overlapper/*pl | grep ^\"0 | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.0
 cat 1-overlapper/*pl | grep ^\"h | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.h
 cat 1-overlapper/*pl | grep ^\"-h | sed 's/"//' | sed 's/\",//' >! 1-overlapper/ovlopts.pl.-h
 paste 1-overlapper/ovlopts.pl.* | p 'print "overlap -M 8GB --hashload 0.8 -t 1 -h $F[3]  -r $F[5] -k 22 -k  \
       ./0-mercounts/asm.nmers.ovl.fasta  -o ./1-overlapper/$F[0]/$F[1].ovb.gz ./asm.gkpStore > \
       ./1-overlapper/overlap.0$..out \n";' | tail -709 > overlap.sh
- Stats
 overlapStore -d asm.ovlStore | awk '{print $1}' | uniq -c | awk '{print $1}' | count.pl | getSummary.pl -i 0 -j 1
 overlapStats -G asm.gkpStore -O asm.ovlStore -o asm
Location
mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT
SOAPdenovo (Tanja)
cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -t "contigs" cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -min 100 -t "contigs(>100bp)" grep "^>" *.scaf | getSummary.pl -i 2 -t scaf
- Stats
. elem min q1 q2 q3 max mean n50 sum contigs 9742349 31 32 33 37 114832 60.09 44 585430821 contigs(>100bp) 177327 100 131 261 1398 114832 1333.68 3897 236496823 # N50 for Bee was 7K scaf 7863 102 903 3272 17692 2338728 37825.70 240706 297423517 # N50 for Bee was 1.17M
- Location
/fs/szattic-asmg5/Bees/Megachile_rotundata/Assembly/assembly5kbForAll
SOAPdenovo (Daniela)
- Stats
. elem min q1 q2 q3 max mean n50 sum contigs(all) 6,917,796 31 32 34 40 121554 70.46 73 487,401,812 contigs(>100bp) 210,666 100 124 222 1174 121554 1108.69 3138 233,563,401 scaff 25,119 351 1896 4444 10914 1102803 11041.00 26876 277,338,897 readOnContig 171,212,613
 cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}'  > asm.K31.contigLen.count
- Location
mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/SOAPdenovo-redo