CS50 DNA PSET6在小数据库上工作,但在大数据库上不起作用
这是我解决Python中CS50 PSET6 DNA问题的方法。它在小数据库上正常工作,但给出了
索引错误:列表索引以外的范围。
我尝试打印以查看错误在哪里。它也打印出大型数据库。不确定下一步该怎么做。
import csv
import sys
def main():
# TODO: Check for command-line usage
if len(sys.argv) != 3:
print("Usage: python dna.py database.csv sequence.txt")
sys.exit(1)
# TODO: Read database file into a variable
dna_database =[]
with open(sys.argv[1], "r") as dna_data_file:
reader = csv.DictReader(dna_data_file)
for row in reader:
dna_database.append(row)
# TODO: Read DNA sequence file into a variable
with open(sys.argv[2], "r") as load_sequence:
sequence = load_sequence.read()
# TODO: Find longest match of each STR in DNA sequence
STR = list(dna_database[0].keys())[1:]
STR_match ={}
for i in range(len(dna_database)):
# print(dna_database)
STR_match[STR[i]] = longest_match(sequence,STR[i])
# TODO: Check database for matching profiles
for i in range(len(dna_database)):
matches = 0
for j in range(len(STR)):
if int(STR_match[STR[j]]) == int(dna_database[i][STR[j]]):
matches += 1
if matches == len(STR):
print(dna_database[i]['name'])
sys.exit(0)
print("No Match")
return
def longest_match(sequence, subsequence):
"""Returns length of longest run of subsequence in sequence."""
# Initialize variables
longest_run = 0
subsequence_length = len(subsequence)
sequence_length = len(sequence)
# Check each character in sequence for most consecutive runs of subsequence
for i in range(sequence_length):
# Initialize count of consecutive runs
count = 0
# Check for a subsequence match in a "substring" (a subset of characters) within sequence
# If a match, move substring to next potential match in sequence
# Continue moving substring and checking for matches until out of consecutive matches
while True:
# Adjust substring start and end
start = i + count * subsequence_length
end = start + subsequence_length
# If there is a match in the substring
if sequence[start:end] == subsequence:
count += 1
# If there is no match in the substring
else:
break
# Update most consecutive matches found
longest_run = max(longest_run, count)
# After checking for runs at each character in seqeuence, return longest run found
return longest_run
main()
This is my solution to CS50 pset6 DNA problem in python. It works fine on small database but gives an
Index error: List Index Out of range.
I tried print to see where is the error.. It prints out large database as well. Not sure what to do next.
import csv
import sys
def main():
# TODO: Check for command-line usage
if len(sys.argv) != 3:
print("Usage: python dna.py database.csv sequence.txt")
sys.exit(1)
# TODO: Read database file into a variable
dna_database =[]
with open(sys.argv[1], "r") as dna_data_file:
reader = csv.DictReader(dna_data_file)
for row in reader:
dna_database.append(row)
# TODO: Read DNA sequence file into a variable
with open(sys.argv[2], "r") as load_sequence:
sequence = load_sequence.read()
# TODO: Find longest match of each STR in DNA sequence
STR = list(dna_database[0].keys())[1:]
STR_match ={}
for i in range(len(dna_database)):
# print(dna_database)
STR_match[STR[i]] = longest_match(sequence,STR[i])
# TODO: Check database for matching profiles
for i in range(len(dna_database)):
matches = 0
for j in range(len(STR)):
if int(STR_match[STR[j]]) == int(dna_database[i][STR[j]]):
matches += 1
if matches == len(STR):
print(dna_database[i]['name'])
sys.exit(0)
print("No Match")
return
def longest_match(sequence, subsequence):
"""Returns length of longest run of subsequence in sequence."""
# Initialize variables
longest_run = 0
subsequence_length = len(subsequence)
sequence_length = len(sequence)
# Check each character in sequence for most consecutive runs of subsequence
for i in range(sequence_length):
# Initialize count of consecutive runs
count = 0
# Check for a subsequence match in a "substring" (a subset of characters) within sequence
# If a match, move substring to next potential match in sequence
# Continue moving substring and checking for matches until out of consecutive matches
while True:
# Adjust substring start and end
start = i + count * subsequence_length
end = start + subsequence_length
# If there is a match in the substring
if sequence[start:end] == subsequence:
count += 1
# If there is no match in the substring
else:
break
# Update most consecutive matches found
longest_run = max(longest_run, count)
# After checking for runs at each character in seqeuence, return longest run found
return longest_run
main()
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论