JSoup 跳过元素 Android

发布于 2024-12-20 11:05:15 字数 2317 浏览 4 评论 0原文

JSoup 似乎跳过了我的 HTML 字符串中的一些元素。我 100% 肯定所有内容都在 HTML 字符串中，但当我选择要解析的元素时，JSoup 仅读取其中一些元素，或者根本不读取。但我知道它们存在。这是我的代码：谢谢：

public void parseDoc() {
    final HttpParams params = new BasicHttpParams();
    HttpClientParams.setRedirecting(params, true);
    HttpClient httpclient = new DefaultHttpClient();
    HttpPost httppost = new HttpPost(
            "https://secure.groupfusion.net/processlogin.php");
    String HTML = "";
    try {
        List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(3);
        nameValuePairs.add(new BasicNameValuePair("referral_page",
                "/modules/gradebook/ui/gradebook.phtml?type=student_view"));
        nameValuePairs.add(new BasicNameValuePair("currDomain",
                "beardenhs.knoxschools.org"));
        nameValuePairs.add(new BasicNameValuePair("username", username
                .getText().toString()));
        nameValuePairs.add(new BasicNameValuePair("password", password
                .getText().toString()));
        httppost.setEntity(new UrlEncodedFormEntity(nameValuePairs));

        HttpResponse response = httpclient.execute(httppost);

        HTML = EntityUtils.toString(response.getEntity());
        Document doc = Jsoup.parse(HTML);
        Element link = doc.select("a").first();
        String linkHref = link.attr("href");
        HttpGet request = new HttpGet();
        try {
            request.setURI(new URI(linkHref));
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        response = httpclient.execute(request);
        String html = "";
        InputStream in = response.getEntity().getContent();
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(in));
        StringBuilder str = new StringBuilder();
        String line = null;
        while ((line = reader.readLine()) != null) {
            str.append(line);
        }
        in.close();
        HTML = str.toString();
        doc = Jsoup.parse(HTML);
        Elements divs = doc.select("div.yuiTop");
        for (Element d: divs) {
            sting.append(d.text());
            sting.append("\n");
        }


    } catch (ClientProtocolException e) {
    } catch (IOException e) {
    }

}

原文

JSoup seems to be skipping some elements in my HTML string. I am 100% positive everything is in the HTML String, but JSoup is only reading some of the elements when I select them to be parsed, or none at all. But I know they exist. Here is my code: Thanks:

public void parseDoc() {
    final HttpParams params = new BasicHttpParams();
    HttpClientParams.setRedirecting(params, true);
    HttpClient httpclient = new DefaultHttpClient();
    HttpPost httppost = new HttpPost(
            "https://secure.groupfusion.net/processlogin.php");
    String HTML = "";
    try {
        List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(3);
        nameValuePairs.add(new BasicNameValuePair("referral_page",
                "/modules/gradebook/ui/gradebook.phtml?type=student_view"));
        nameValuePairs.add(new BasicNameValuePair("currDomain",
                "beardenhs.knoxschools.org"));
        nameValuePairs.add(new BasicNameValuePair("username", username
                .getText().toString()));
        nameValuePairs.add(new BasicNameValuePair("password", password
                .getText().toString()));
        httppost.setEntity(new UrlEncodedFormEntity(nameValuePairs));

        HttpResponse response = httpclient.execute(httppost);

        HTML = EntityUtils.toString(response.getEntity());
        Document doc = Jsoup.parse(HTML);
        Element link = doc.select("a").first();
        String linkHref = link.attr("href");
        HttpGet request = new HttpGet();
        try {
            request.setURI(new URI(linkHref));
        } catch (URISyntaxException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        response = httpclient.execute(request);
        String html = "";
        InputStream in = response.getEntity().getContent();
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(in));
        StringBuilder str = new StringBuilder();
        String line = null;
        while ((line = reader.readLine()) != null) {
            str.append(line);
        }
        in.close();
        HTML = str.toString();
        doc = Jsoup.parse(HTML);
        Elements divs = doc.select("div.yuiTop");
        for (Element d: divs) {
            sting.append(d.text());
            sting.append("\n");
        }


    } catch (ClientProtocolException e) {
    } catch (IOException e) {
    }

}

分享到QQ

分享到微博