Megachile rotundata: Difference between revisions

From Cbcb
Jump to navigation Jump to search
Line 120: Line 120:
   contigs(>100bp)      210666    100    124    222    1174  121554    1108.69    3138      233563401
   contigs(>100bp)      210666    100    124    222    1174  121554    1108.69    3138      233563401
   scaff                25119      351    1896  4444  10914  1102803    11041.00  26876      277338897
   scaff                25119      351    1896  4444  10914  1102803    11041.00  26876      277338897
  cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}'  > asm.K31.contigLen.count


* Location
* Location


   mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/SOAPdenovo-redo
   mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/SOAPdenovo-redo

Revision as of 03:01, 3 September 2010

Data

Original Traces

  • 8 pairs of data files (paired ends)
 cat trace.count | grep _1_ | sed 's/_sequence.txt//' | perl -ane 'print "  ",$F[1],"\t",$F[0]/4,"\t",$F[0]/2,"\n";'
 lib        insert   mates           reads        readLen   ~coverage(500M genome)  reverse  adaptors            comments
 s_2_1_3kbp 3000     21563283        43,126,566   124       11                      ?        circularizarion
 s_2_1_5kbp 5000/300 36218589        72,437,178   35        5                       yes      ?                   insert size is << 5kbp
 s_2_1_8kbp 8000     198377          396,754      124       0.1                     ?        ?
 s_3_1      475      35548153        71,096,306   124       18
 s_4_1      475      35471044        70,942,088   124       18
 s_5_1      475      35616846        71,233,692   124       18
 s_6_1      475      35303840        70,607,680   124       18
 s_7_1      475      34893313        69,786,626   124       18

Adaptors

 >circularizarion
 CGTAATAACTTCGTATAGCATACATTATACGAAGTTATACGA
 >circularizarion.revcomp
 TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG 

Location

 /fs/szattic-asmg5/Bees/Megachile_rotundata/error_correction/large_libs/s_?_?_?kb.sequence.cor.all.txt
 ftp://ftp.cbcb.umd.edu/pub/data/assembly/Megachile_rotundata/reads/s_?_?_?kb.sequence.cor.all.txt.gz

 /fs/szattic-asmg5/Bees/Megachile_rotundata/frg  # frg files to assemble

Assemblies

  • CA Version: 6.1 (09/01/2010) /fs/szdevel/dpuiu/SourceForge/wgs-6.1/Linux-amd64/bin/runCA
  • SOAP version 1.04: /nfshomes/dpuiu/szdevel/SOAPdenovo_Release1.04/

CA noOBT

Gatekeeper

  • ~ 74X cvg
 LOAD                  STATS          
 7                     libInput       
 7                     libLoaded      
 0                     libErrors      
 5                     libWarnings    
 
 326,236,387           frgLoaded        
 326236387             numRandom      
 326236387             numPacked       

 LibraryName           numActiveFRG  numDelFRG  numMatedFRG  readLength   clearLength    #AATCATACAATCACAATCATAC|GTATGATTGTGATTGTATGATT
 GLOBAL                326,236,387   0          315518526    37451489553  37418130441  
 LegacyUnmatedReads    0             0          0            0            0            
 s_2_3kb               9107424       0          9107424      942165284    910444046      #1125738  12%
 s_2_8kb               209336        0          209336       21814418     20787384       #46762    22%
 s_3                   63618839      0          61696784     7343024554   7342819494     #         11%
 s_4                   63544688      0          61255960     7291557748   7291478152     #6804757  11%
 s_5                   63370860      0          61084368     7271218123   7271051639     #         11%
 s_6                   63780887      0          61685156     7359094156   7359012512     #         11%
 s_7                   62604353      0          60479498     7222615270   7222537214     #         11%

Meryl

 Found 30570218845 mers.
 Found 271464470 distinct mers.
 Found 11164787 unique mers.
 Largest mercount is 87984949; 1896 mers are too big for histogram.
 1       11164787        0.0411  0.0004
 2       9376915 0.0757  0.0010
 3       3714582 0.0894  0.0013
 ...
 54      5344148 0.6573  0.1788
 ... 
 fasta2tab.pl 0-mercounts/asm.nmers.ovl.fasta | sort -n -r | head -5
 87908217        AATCATACAATCACAATCATAC
 84450288        CAATCATACAATCACAATCATA
 82648568        ATCATACAATCACAATCATACA
 82247943        ATTGTATGATTGTGATTGTATG
 81906734        TCATACAATCACAATCATACAA
 ...
 egrep -c 'AATCATACAATCACAATCATAC|GTATGATTGTGATTGTATGATT' *fastq *txt > egrep.count
 mulberry:/scratch2/dpuiu/Megachile_rotundata/Data/error_free/egrep.count

Overlap

  • job count :
 cat 1-overlapper/ovlopts.pl | grep ^\"h | wc -l
 924
  • Stats
 overlapStore -d asm.ovlStore | awk '{print $1}' | uniq -c | awk '{print $1}' | count.pl | getSummary.pl -i 0 -j 1
 overlapStats -G asm.gkpStore -O asm.ovlStore -o asm

Location

 mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/wgs-noOBT

SOAPdenovo (Tanja)

 cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -t "contigs"
 cat *.ContigIndex | grep -v ^E | grep -v ^i | count.pl -i 1 | getSummary.pl -j 1 -min 100 -t "contigs(>100bp)"
 grep "^>" *.scaf | getSummary.pl -i 2 -t scaf
  • Stats
 .                    elem       min    q1     q2     q3     max        mean       n50        sum            
 contigs              9742349    31     32     33     37     114832     60.09      44         585430821      
 contigs(>100bp)      177327     100    131    261    1398   114832     1333.68    3897       236496823     # N50 for Bee was 7K
 scaf                 7863       102    903    3272   17692  2338728    37825.70   240706     297423517     # N50 for Bee was 1.17M

  • Location
 /fs/szattic-asmg5/Bees/Megachile_rotundata/Assembly/assembly5kbForAll

SOAPdenovo (Daniela)

  • Stats
 .                    elem       min    q1     q2     q3     max        mean       n50        sum            
 contigs(all)         6917796    31     32     34     40     121554     70.46      73         487401812      
 contigs(>100bp)      210666     100    124    222    1174   121554     1108.69    3138       233563401
 scaff                25119      351    1896   4444   10914  1102803    11041.00   26876      277338897
 cat asm.K31.contig | grep "^>" | awk '{print $3}' | uniq -c | awk '{print $2,$1}'  > asm.K31.contigLen.count
  • Location
 mulberry:/scratch2/dpuiu/Megachile_rotundata/Assembly/SOAPdenovo-redo