#!/usr/bin/python seq_filename = "Ecoli_genome.txt" total_length = 0 dinucleotide = {} # create an empty dictionary genome = "" seq_file = open(seq_filename, "r") for raw_line in seq_file: line = raw_line.rstrip("\r\n") length = len(line) genome += line total_length += length seq_file.close() print "This genome was {0} nucleotides long".format(total_length) print "The first 1000 nucleotides are {0}".format(genome[0:1000]) for position in range(total_length-1): dinuc = genome[position:position+2] if dinucleotide.has_key(dinuc): dinucleotide[dinuc] += 1 else: dinucleotide[dinuc] = 1 for n in dinucleotide.keys(): fraction = 100.0 * dinucleotide[n] / total_length print "The dinucleotide {0} occurs {1} times, or {2} %".format(n, dinucleotide[n], fraction)