Perl 处理两个哈希引用
我想比较两个哈希引用的值。 我的第一个哈希的数据转储器是这样的:
$VAR1 = {
'42-MG-BA' => [
{
'chromosome' => '19',
'position' => '35770059',
'genotype' => 'TC'
},
{
'chromosome' => '2',
'position' => '68019584',
'genotype' => 'G'
},
{
'chromosome' => '16',
'position' => '9561557',
'genotype' => 'G'
},
第二个哈希与此类似,但数组中有更多哈希。如果位置和染色体匹配,我想比较我的第一个和第二个哈希的基因型。
map {print "$_= $cave_snp_list->{$_}->[0]->{chromosome}\n"}sort keys %$cave_snp_list;
map {print "$_= $geno_seq_list->{$_}->[0]->{chromosome}\n"}sort keys %$geno_seq_list;
我可以对第一个哈希数组执行此操作。 您能帮我了解如何处理所有阵列吗?
这是我的完整实际代码
#!/software/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Benchmark;
use Config::Config qw(Sequenom.ini);
useDatabase::Conn;
use Data::Dumper;
GetOptions("sam=s" => \my $sample);
my $geno_seq_list = getseqgenotypes($sample);
my $cave_snp_list = getcavemansnpfile($sample);
#print Dumper($geno_seq_list);
print scalar %$geno_seq_list, "\n";
foreach my $sam (keys %{$geno_seq_list}) {
my $seq_used = $geno_seq_list->{$sam};
my $cave_used = $cave_snp_list->{$sam};
print scalar(@$geno_seq_list->{$_}) if sort keys %$geno_seq_list, "\n";
print scalar(@$cave_used), "\n";
#foreach my $seq2com (@ {$seq_used } ){
# foreach my $cave2com( @ {$cave_used} ){
# print $seq2com->{chromosome},":" ,$cave2com->{chromosome},"\n";
# }
#}
map { print "$_= $cave_snp_list->{$_}->[0]->{chromosome}\n" } sort keys %$cave_snp_list;
map { print "$_= $geno_seq_list->{$_}->[0]->{chromosome}\n" } sort keys %$geno_seq_list;
}
sub getseqgenotypes {
my $snpconn;
my $gen_list = {};
$snpconn = Database::Conn->new('live');
$snpconn->addConnection(DBI->connect('dbi:Oracle:pssd.world', 'sn', 'ss', { RaiseError => 1, AutoCommit => 0 }),
'pssd');
#my $conn2 =Database::Conn->new('live');
#$conn2->addConnection(DBI->connect('dbi:Oracle:COSI.world','nst_owner','nst_owner', {RaiseError =>1 , AutoCommit=>0}),'nst');
my $id_ind = $snpconn->execute('snp::Sequenom::getIdIndforExomeSample', $sample);
my $genotype = $snpconn->executeArrRef('snp::Sequenom::getGenotypeCallsPosition', $id_ind);
foreach my $geno (@{$genotype}) {
push @{ $gen_list->{ $geno->[1] } }, {
chromosome => $geno->[2],
position => $geno->[3],
genotype => $geno->[4],
};
}
return ($gen_list);
} #end of sub getseqgenotypes
sub getcavemansnpfile {
my $nstconn;
my $caveman_list = {};
$nstconn = Database::Conn->new('live');
$nstconn->addConnection(
DBI->connect('dbi:Oracle:CANP.world', 'nst_owner', 'NST_OWNER', { RaiseError => 1, AutoCommit => 0 }), 'nst');
my $id_sample = $nstconn->execute('nst::Caveman::getSampleid', $sample);
#print "IDSample: $id_sample\n";
my $file_location = $nstconn->execute('nst::Caveman::getCaveManSNPSFile', $id_sample);
open(SNPFILE, "<$file_location") || die "Error: Cannot open the file $file_location:$!\n";
while (<SNPFILE>) {
chomp;
next if /^>/;
my @data = split;
my ($nor_geno, $tumor_geno) = split /\//, $data[5];
# array of hash
push @{ $caveman_list->{$sample} }, {
chromosome => $data[0],
position => $data[1],
genotype => $nor_geno,
};
} #end of while loop
close(SNPFILE);
return ($caveman_list);
}
I would like to compare the values of two hash references.
The data dumper of my first hash is this:
$VAR1 = {
'42-MG-BA' => [
{
'chromosome' => '19',
'position' => '35770059',
'genotype' => 'TC'
},
{
'chromosome' => '2',
'position' => '68019584',
'genotype' => 'G'
},
{
'chromosome' => '16',
'position' => '9561557',
'genotype' => 'G'
},
And the second hash is similar to this but with more hashes in the array. I would like to compare the genotype of my first and second hash if the position and the choromosome matches.
map {print "$_= $cave_snp_list->{$_}->[0]->{chromosome}\n"}sort keys %$cave_snp_list;
map {print "$_= $geno_seq_list->{$_}->[0]->{chromosome}\n"}sort keys %$geno_seq_list;
I could do that for the first array of the hashes.
Could you help me in how to work for all the arrays?
This is my actual code in full
#!/software/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Benchmark;
use Config::Config qw(Sequenom.ini);
useDatabase::Conn;
use Data::Dumper;
GetOptions("sam=s" => \my $sample);
my $geno_seq_list = getseqgenotypes($sample);
my $cave_snp_list = getcavemansnpfile($sample);
#print Dumper($geno_seq_list);
print scalar %$geno_seq_list, "\n";
foreach my $sam (keys %{$geno_seq_list}) {
my $seq_used = $geno_seq_list->{$sam};
my $cave_used = $cave_snp_list->{$sam};
print scalar(@$geno_seq_list->{$_}) if sort keys %$geno_seq_list, "\n";
print scalar(@$cave_used), "\n";
#foreach my $seq2com (@ {$seq_used } ){
# foreach my $cave2com( @ {$cave_used} ){
# print $seq2com->{chromosome},":" ,$cave2com->{chromosome},"\n";
# }
#}
map { print "$_= $cave_snp_list->{$_}->[0]->{chromosome}\n" } sort keys %$cave_snp_list;
map { print "$_= $geno_seq_list->{$_}->[0]->{chromosome}\n" } sort keys %$geno_seq_list;
}
sub getseqgenotypes {
my $snpconn;
my $gen_list = {};
$snpconn = Database::Conn->new('live');
$snpconn->addConnection(DBI->connect('dbi:Oracle:pssd.world', 'sn', 'ss', { RaiseError => 1, AutoCommit => 0 }),
'pssd');
#my $conn2 =Database::Conn->new('live');
#$conn2->addConnection(DBI->connect('dbi:Oracle:COSI.world','nst_owner','nst_owner', {RaiseError =>1 , AutoCommit=>0}),'nst');
my $id_ind = $snpconn->execute('snp::Sequenom::getIdIndforExomeSample', $sample);
my $genotype = $snpconn->executeArrRef('snp::Sequenom::getGenotypeCallsPosition', $id_ind);
foreach my $geno (@{$genotype}) {
push @{ $gen_list->{ $geno->[1] } }, {
chromosome => $geno->[2],
position => $geno->[3],
genotype => $geno->[4],
};
}
return ($gen_list);
} #end of sub getseqgenotypes
sub getcavemansnpfile {
my $nstconn;
my $caveman_list = {};
$nstconn = Database::Conn->new('live');
$nstconn->addConnection(
DBI->connect('dbi:Oracle:CANP.world', 'nst_owner', 'NST_OWNER', { RaiseError => 1, AutoCommit => 0 }), 'nst');
my $id_sample = $nstconn->execute('nst::Caveman::getSampleid', $sample);
#print "IDSample: $id_sample\n";
my $file_location = $nstconn->execute('nst::Caveman::getCaveManSNPSFile', $id_sample);
open(SNPFILE, "<$file_location") || die "Error: Cannot open the file $file_location:$!\n";
while (<SNPFILE>) {
chomp;
next if /^>/;
my @data = split;
my ($nor_geno, $tumor_geno) = split /\//, $data[5];
# array of hash
push @{ $caveman_list->{$sample} }, {
chromosome => $data[0],
position => $data[1],
genotype => $nor_geno,
};
} #end of while loop
close(SNPFILE);
return ($caveman_list);
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
我看到的问题是,当您想要的是特定于任务的图形时,您正在构建用于通用数据存储的树。在构建记录时,您还可以构建将数据分组在一起的部分。下面仅举一例。
在主线上,您会像这样接收它们:
这种方法至少允许您找到相似的位置和染色体值。如果您打算对此做很多事情,我可能会建议采用面向对象的方法。
更新
假设您不必存储标签,我们可以将查找更改为
然后可以编写比较:
但是,如果您对较大列表的使用有限,您可以构造具体案例
并在创建较长列表时将其作为过滤器传递。
因此,无论哪个循环创建更长的列表,您都可以
The problem that I see is that you're constructing a tree for generic storage of data, when what you want is a graph, specific to the task. While you are constructing the record, you could also be constructing the part that groups data together. Below is just one example.
In the main line, you receive them like so:
This approach at least allows you to locate similar position and chromosome values. If you're going to do much with this, I might suggest an OO approach.
Update
Assuming that you wouldn't have to store the label, we could change the lookup to
And then the comparison could be written:
However, if you had limited use for the larger list, you could construct the specific case
and pass that in as a filter when creating the longer list.
Thus, in whichever loop creates the longer list, you could just go