使用Python和Mechanize提交表单数据并进行身份验证

发布于 2024-10-12 09:20:15 字数 1156 浏览 4 评论 0原文

我想要登录 Reddit.com 网站，导航到页面的特定区域，然后提交评论。我不明白这段代码有什么问题，但它不起作用，因为 Reddit 网站上没有反映任何更改。

import mechanize
import cookielib


def main():

#Browser
br = mechanize.Browser()


# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)

# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

#Opens the site to be navigated
r= br.open('http://www.reddit.com')
html = r.read()

# Select the second (index one) form
br.select_form(nr=1)

# User credentials
br.form['user'] = 'DUMMYUSERNAME'
br.form['passwd'] = 'DUMMYPASSWORD'

# Login
br.submit()

#Open up comment page
r= br.open('http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/')
html = r.read()

#Text box is the 8th form on the page (which, I believe, is the text area)
br.select_form(nr=7)

#Change 'text' value to a testing string
br.form['text']= "this is an automated test"

#Submit the information  
br.submit()

这有什么问题吗？

原文

I want to submit login to the website Reddit.com, navigate to a particular area of the page, and submit a comment. I don't see what's wrong with this code, but it is not working in that no change is reflected on the Reddit site.

import mechanize
import cookielib


def main():

#Browser
br = mechanize.Browser()


# Cookie Jar
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)

# Browser options
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)

# Follows refresh 0 but not hangs on refresh > 0
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

#Opens the site to be navigated
r= br.open('http://www.reddit.com')
html = r.read()

# Select the second (index one) form
br.select_form(nr=1)

# User credentials
br.form['user'] = 'DUMMYUSERNAME'
br.form['passwd'] = 'DUMMYPASSWORD'

# Login
br.submit()

#Open up comment page
r= br.open('http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/')
html = r.read()

#Text box is the 8th form on the page (which, I believe, is the text area)
br.select_form(nr=7)

#Change 'text' value to a testing string
br.form['text']= "this is an automated test"

#Submit the information  
br.submit()

What's wrong with this?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

我很坚强 2024-10-19 09:20:15

如果可能的话，我绝对建议尝试使用 API，但这对我有用（不适用于您的示例帖子，该帖子已被删除，但适用于任何活动的帖子）：

#!/usr/bin/env python

import mechanize
import cookielib
import urllib
import logging
import sys

def main():

    br = mechanize.Browser()
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)

    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    r= br.open('http://www.reddit.com')

    # Select the second (index one) form
    br.select_form(nr=1)

    # User credentials
    br.form['user'] = 'user'
    br.form['passwd'] = 'passwd'

    # Login
    br.submit()

    # Open up comment page
    posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
    rval = 'PoopSandwiches'
    # you can get the rval in other ways, but this will work for testing

    r = br.open(posting)

    # You need the 'uh' value from the first form
    br.select_form(nr=0)
    uh = br.form['uh']

    br.select_form(nr=7)
    thing_id = br.form['thing_id']
    id = '#' + br.form.attrs['id']
    # The id that gets posted is the form id with a '#' prepended.

    data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
    new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())

    # not sure if the replace needs to happen, I did it anyway
    new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)

    # not sure which of these headers are really needed, but it works with all
    # of them, so why not just include them.
    req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
    req.add_header('Referer', posting)
    req.add_header('Accept', ' application/json, text/javascript, */*')
    req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    cj.add_cookie_header(req)
    res = mechanize.urlopen(req)

main()

关闭 javascript 并查看 reddit 的评论会很有趣则予以处理。现在，在发布帖子时调用的 onsubmit 函数中发生了很多魔法。这是添加 uh 和 id 值的位置。

I would definitely suggest trying to use the API if possible, but this works for me (not for your example post, which has been deleted, but for any active one):

#!/usr/bin/env python

import mechanize
import cookielib
import urllib
import logging
import sys

def main():

    br = mechanize.Browser()
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)

    br.set_handle_equiv(True)
    br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)

    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)

    r= br.open('http://www.reddit.com')

    # Select the second (index one) form
    br.select_form(nr=1)

    # User credentials
    br.form['user'] = 'user'
    br.form['passwd'] = 'passwd'

    # Login
    br.submit()

    # Open up comment page
    posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
    rval = 'PoopSandwiches'
    # you can get the rval in other ways, but this will work for testing

    r = br.open(posting)

    # You need the 'uh' value from the first form
    br.select_form(nr=0)
    uh = br.form['uh']

    br.select_form(nr=7)
    thing_id = br.form['thing_id']
    id = '#' + br.form.attrs['id']
    # The id that gets posted is the form id with a '#' prepended.

    data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
    new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())

    # not sure if the replace needs to happen, I did it anyway
    new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)

    # not sure which of these headers are really needed, but it works with all
    # of them, so why not just include them.
    req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
    req.add_header('Referer', posting)
    req.add_header('Accept', ' application/json, text/javascript, */*')
    req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
    req.add_header('X-Requested-With', 'XMLHttpRequest')
    cj.add_cookie_header(req)
    res = mechanize.urlopen(req)

main()

It would be interesting to turn javascript off and see how the reddit comments are handled then. Right now there is a bunch of magic that happens in an onsubmit function called when making your post. This is where the uh and id value get added.

回复收藏 0 原文

~没有更多了~