使用 Mongo Map/Reduce 获取子字典值

发布于 2024-11-28 16:35:09 字数 2305 浏览 1 评论 0原文

我有一个 mongo 集合,我想通过给定的网站名称、时间戳和变体 ID 获取“number_of_ad_clicks”的总价值。因为我们有大量数据,所以最好使用map/reduce。有人可以给我任何建议吗?

这是我的集合 json 格式

{ "_id" : ObjectId( "4e3c280ecacbd1333b00f5ff" ),
  "timestamp" : "20110805",
  "variants" : { "94" : { "number_of_ad_clicks" : 41,
      "number_of_search_keywords" : 9,
      "total_duration" : 0,
      "os" : { "os_2" : 2,
        "os_1" : 1,
        "os_0" : 0 },
      "countries" : { "ge" : 6,
        "ca" : 1,
        "fr" : 8,
        "uk" : 4,
        "us" : 6 },
      "screen_resolutions" : { "(320, 240)" : 1,
        "(640, 480)" : 5,
        "(1024, 960)" : 5,
        "(1280, 768)" : 5 },
      "widgets" : { "widget_1" : 1,
        "widget_0" : 0 },
      "languages" : { "ua_uk" : 8,
        "ca_en" : 2,
        "ca_fr" : 2,
        "us_en" : 5 },
      "search_keywords" : { "search_keyword_8" : 8,
        "search_keyword_5" : 5,
        "search_keyword_4" : 4,
        "search_keyword_7" : 7,
        "search_keyword_6" : 6,
        "search_keyword_1" : 1,
        "search_keyword_3" : 3,
        "search_keyword_2" : 2 },
      "number_of_pageviews" : 18,
      "browsers" : { "browser_4" : 4,
        "browser_0" : 0,
        "browser_1" : 1,
        "browser_2" : 2,
        "browser_3" : 3 },
      "keywords" : { "keyword_5" : 5,
        "keyword_4" : 4,
        "keyword_1" : 1,
        "keyword_0" : 0,
        "keyword_3" : 3,
        "keyword_2" : 2 },
      "number_of_keyword_clicks" : 83,
      "number_of_visits" : 96 } },
  "site_name" : "fonter.com",
  "number_of_variants" : 1 }

这是我的尝试。但失败了。 他是我的尝试。

m = function() {
    emit(this.query, {variants: this.variants});
}

r = function(key , vals) {
    var clicks = 0 ;
    for(var i = 0; i < vals.length(); i++){
        clicks = vals[i]['number_of_ad_clicks'];
    }
    return clicks;
}
res = db.variant_daily_collection.mapReduce(m, r, {out : "myoutput", "query":{"site_name": 'fonter.com', 'timestamp': '20110805'}})
db.myoutput.find()

有人可以提出任何建议吗?

非常感谢您,我尝试了您的解决方案,但没有任何回报。 我在下面调用mapreduce,有什么问题吗?

res = db.variant_daily_collection.mapReduce(map, reduce, {out : "myoutput", "query":{"site_name": 'facee.com', 'timestamp': '20110809', 'variant_id': '305'}})
db.myoutput.find()

I have a mongo collection, i want get total value of 'number_of_ad_clicks' by given sitename, timestamp and variant id. Because we have large data so it would be better use map/reduce. Could any guys give me any suggestion?

Here is my collection json format

{ "_id" : ObjectId( "4e3c280ecacbd1333b00f5ff" ),
  "timestamp" : "20110805",
  "variants" : { "94" : { "number_of_ad_clicks" : 41,
      "number_of_search_keywords" : 9,
      "total_duration" : 0,
      "os" : { "os_2" : 2,
        "os_1" : 1,
        "os_0" : 0 },
      "countries" : { "ge" : 6,
        "ca" : 1,
        "fr" : 8,
        "uk" : 4,
        "us" : 6 },
      "screen_resolutions" : { "(320, 240)" : 1,
        "(640, 480)" : 5,
        "(1024, 960)" : 5,
        "(1280, 768)" : 5 },
      "widgets" : { "widget_1" : 1,
        "widget_0" : 0 },
      "languages" : { "ua_uk" : 8,
        "ca_en" : 2,
        "ca_fr" : 2,
        "us_en" : 5 },
      "search_keywords" : { "search_keyword_8" : 8,
        "search_keyword_5" : 5,
        "search_keyword_4" : 4,
        "search_keyword_7" : 7,
        "search_keyword_6" : 6,
        "search_keyword_1" : 1,
        "search_keyword_3" : 3,
        "search_keyword_2" : 2 },
      "number_of_pageviews" : 18,
      "browsers" : { "browser_4" : 4,
        "browser_0" : 0,
        "browser_1" : 1,
        "browser_2" : 2,
        "browser_3" : 3 },
      "keywords" : { "keyword_5" : 5,
        "keyword_4" : 4,
        "keyword_1" : 1,
        "keyword_0" : 0,
        "keyword_3" : 3,
        "keyword_2" : 2 },
      "number_of_keyword_clicks" : 83,
      "number_of_visits" : 96 } },
  "site_name" : "fonter.com",
  "number_of_variants" : 1 }

Here is my try. but failed.
He is my try.

m = function() {
    emit(this.query, {variants: this.variants});
}

r = function(key , vals) {
    var clicks = 0 ;
    for(var i = 0; i < vals.length(); i++){
        clicks = vals[i]['number_of_ad_clicks'];
    }
    return clicks;
}
res = db.variant_daily_collection.mapReduce(m, r, {out : "myoutput", "query":{"site_name": 'fonter.com', 'timestamp': '20110805'}})
db.myoutput.find()

could somebody any suggestion?

Thank you very much, i try you solution but nothing return.
I invoke the mapreduce in the following, is there any thing wrong?

res = db.variant_daily_collection.mapReduce(map, reduce, {out : "myoutput", "query":{"site_name": 'facee.com', 'timestamp': '20110809', 'variant_id': '305'}})
db.myoutput.find()

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

落花浅忆 2024-12-05 16:35:09

emit 函数同时发出一个key 和一个value

如果您习惯使用 SQL,请将 key 视为您的 GROUP BY,将 value 视为您的 SUM()、AVG() 等.

在您的情况下,您想要“分组依据”:site_name、时间戳和变体 ID。看起来您可能有多个变体,因此您需要循环遍历这些变体,如下所示:

map = function() {
  for(var i in variants){
    var key = {};
    key.timestamp = this.timestamp;
    key.site_name = this.site_name;
    key.variant_id = i; // that's the "94" string.

    var value = {};
    value.clicks = this.variants[i].number_of_ad_clicks;

    emit(key, value);
  }
}

reduce 函数将获取一组值,每个值如下 { clicks: 41 }。该函数需要返回一个看起来相同的对象。

因此,如果您得到 values = [ {clicks:21}, {clicks:10}, {clicks:5} ],则必须输出 {clicks:36}

因此,您可以执行以下操作:

reduce = function(key , vals) {
    var returnValue = { clicks: 0 }; // initializing to zero
    for(var i = 0; i < vals.length(); i++){
        returnValue.clicks += vals[i].clicks;
    }
    return returnValue;
}

请注意,map 中的 valuereduce 返回的形状具有相同的形状。

The emit function emits both a key and a value.

If you are used to SQL think of key as your GROUP BY and value as your SUM(), AVG(), etc..

In your case you want to "group by": site_name, timestamp and variant id. It looks like you may have more than one variant, so you will need to loop through the variants, like this:

map = function() {
  for(var i in variants){
    var key = {};
    key.timestamp = this.timestamp;
    key.site_name = this.site_name;
    key.variant_id = i; // that's the "94" string.

    var value = {};
    value.clicks = this.variants[i].number_of_ad_clicks;

    emit(key, value);
  }
}

The reduce function will get an array of values each one like this { clicks: 41 }. The function needs to return one object that looks the same.

So if you get values = [ {clicks:21}, {clicks:10}, {clicks:5} ] you must output {clicks:36}.

So you do something like this:

reduce = function(key , vals) {
    var returnValue = { clicks: 0 }; // initializing to zero
    for(var i = 0; i < vals.length(); i++){
        returnValue.clicks += vals[i].clicks;
    }
    return returnValue;
}

Note that the value from map has the same shape as the return from reduce.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文