如何从 Web 目录获取文件列表？

发布于 2024-08-09 02:34:20 字数 99 浏览 8 评论 0 原文

如何从 Web 目录获取文件列表？如果我访问 Web 目录 URL，互联网浏览器会列出该目录中的所有文件。现在我只想在 C# 中获取该列表并在 BITS（后台智能传输服务）中下载它们。

原文

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

一曲琵琶半遮面シ 2024-08-16 02:34:20

关于“在 C# 中获取该列表”部分：

foreach (string filename in 
    Directory.GetFiles(
        Server.MapPath("/"), "*.jpg", 
        SearchOption.AllDirectories))
{
    Response.Write(
        String.Format("{0}<br />", 
            Server.HtmlEncode(filename)));
}

About "get that list in C#" part:

foreach (string filename in 
    Directory.GetFiles(
        Server.MapPath("/"), "*.jpg", 
        SearchOption.AllDirectories))
{
    Response.Write(
        String.Format("{0}<br />", 
            Server.HtmlEncode(filename)));
}

回复收藏 0 原文

掩饰不了的爱 2024-08-16 02:34:20

这是我最近研究的一个有趣的话题。如您所知，您可以通过 COM 访问 BITS，但这里有几个项目可以使它更容易：

SharpBITS.NET
表单设计器友好的后台智能传输服务 (BITS) 包装

此 MSDN 上的文章可能比您想知道的要多一些。

我尝试了 CodeProject 链接中的代码，它似乎工作得相当好。 CodePlex 项目看起来确实不错，但我还没有尝试过。

回复收藏 0 原文

一念一轮回 2024-08-16 02:34:20

好吧，如果 Web 服务器允许列出相关目录中的文件，那么您就可以开始了。

不幸的是，对于网络服务器如何返回列表没有标准。它通常采用 HTML 格式，但 HTML 在多个 Web 服务器上的格式并不总是相同。

如果您想始终从同一 Web 服务器上的同一目录下载文件，只需在 Web 浏览器的目录中执行“查看源”即可。然后尝试编写一个小的正则表达式来从 HTML 源中获取每个文件名。

然后，您可以创建一个 WebClient，请求目录 URL，解析响应以使用正则表达式获取文件名，然后使用 BITS 客户端处理文件

希望这会有所帮助

回复收藏 0 原文

倥絔 2024-08-16 02:34:20

private void ListFiles()
{

    //get the user calling this page 
    Gaf.Bl.User userObj = base.User;
    //get he debug directory of this user
    string strDebugDir = userObj.UserSettings.DebugDir;
    //construct the Directory Info directory 
    DirectoryInfo di = new DirectoryInfo(strDebugDir);
    if (di.Exists == true)
    {

        //get the array of files for this 
        FileInfo[] rgFiles = di.GetFiles("*.html");
        //create the list ... .it is easier to sort ... 
        List<FileInfo> listFileInfo = new List<FileInfo>(rgFiles);
        //inline sort descending by file's full path 
        listFileInfo.Sort((x, y) => string.Compare(y.FullName, x.FullName));
        //now print the result 
        foreach (FileInfo fi in listFileInfo)
        {
            Response.Write("<br><a href=" + fi.Name + ">" + fi.Name + "</a>");
        } //eof foreach
    } //eof if dir exists

} //eof method

private void ListFiles()
{

    //get the user calling this page 
    Gaf.Bl.User userObj = base.User;
    //get he debug directory of this user
    string strDebugDir = userObj.UserSettings.DebugDir;
    //construct the Directory Info directory 
    DirectoryInfo di = new DirectoryInfo(strDebugDir);
    if (di.Exists == true)
    {

        //get the array of files for this 
        FileInfo[] rgFiles = di.GetFiles("*.html");
        //create the list ... .it is easier to sort ... 
        List<FileInfo> listFileInfo = new List<FileInfo>(rgFiles);
        //inline sort descending by file's full path 
        listFileInfo.Sort((x, y) => string.Compare(y.FullName, x.FullName));
        //now print the result 
        foreach (FileInfo fi in listFileInfo)
        {
            Response.Write("<br><a href=" + fi.Name + ">" + fi.Name + "</a>");
        } //eof foreach
    } //eof if dir exists

} //eof method

回复收藏 0 原文

夢归不見 2024-08-16 02:34:20

我编写了一些代码，可以从允许列表目录的 IIS 站点获取所有路径信息，包括文件和目录。您可以自定义正则表达式以满足您的需要（或更改为使用 html 解析器）。此外，您可以自己添加一些代码来获取更详细的信息，例如文件大小或创建时间。

您可以在两行中获取所有路径信息：

List<PathInfo> pathInfos = new List<PathInfo>();
HttpHelper.GetAllFilePathAndSubDirectory("http://localhost:33333/", pathInfos);

帮助程序代码：

public static class HttpHelper
{
    public static string ReadHtmlContentFromUrl(string url)
    {
        string html = string.Empty;
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

        using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
        using (Stream stream = response.GetResponseStream())
        using (StreamReader reader = new StreamReader(stream))
        {
            html = reader.ReadToEnd();
        }
        //Console.WriteLine(html);
        return html;
    }

    public static void GetAllFilePathAndSubDirectory(string baseUrl, List<PathInfo> pathInfos)
    {
        Uri baseUri = new Uri( baseUrl.TrimEnd('/') );
        string rootUrl = baseUri.GetLeftPart(UriPartial.Authority);


        Regex regexFile = new Regex("[0-9] <a href=\"(http:|https:)?(?<file>.*?)\"", RegexOptions.IgnoreCase);
        Regex regexDir = new Regex("dir.*?<a href=\"(http:|https:)?(?<dir>.*?)\"", RegexOptions.IgnoreCase);

        string html = ReadHtmlContentFromUrl(baseUrl);
        //Files
        MatchCollection matchesFile = regexFile.Matches(html);
        if (matchesFile.Count != 0)
            foreach (Match match in matchesFile)
                if (match.Success)
                    pathInfos.Add(
                        new PathInfo( rootUrl + match.Groups["file"], false));
        //Dir
        MatchCollection matchesDir = regexDir.Matches(html);
        if (matchesDir.Count != 0)
            foreach (Match match in matchesDir)
                if (match.Success)
                {
                    var dirInfo = new PathInfo(rootUrl + match.Groups["dir"], true);
                    GetAllFilePathAndSubDirectory(dirInfo.AbsoluteUrlStr, dirInfo.Childs);
                    pathInfos.Add(dirInfo);
                }                        

    }


    public static void PrintAllPathInfo(List<PathInfo> pathInfos)
    {
        pathInfos.ForEach(f =>
        {
            Console.WriteLine(f.AbsoluteUrlStr);
            PrintAllPathInfo(f.Childs);
        });
    }

}



public class PathInfo
{
    public PathInfo(string absoluteUri, bool isDir)
    {
        AbsoluteUrl = new Uri(absoluteUri);
        IsDir = isDir;
        Childs = new List<PathInfo>();
    }

    public Uri AbsoluteUrl { get; set; }

    public string AbsoluteUrlStr
    {
        get { return AbsoluteUrl.ToString(); }
    }

    public string RootUrl
    {
        get { return AbsoluteUrl.GetLeftPart(UriPartial.Authority); }
    }

    public string RelativeUrl
    {
        get { return AbsoluteUrl.PathAndQuery; }
    }

    public string Query
    {
        get { return AbsoluteUrl.Query; }
    }

    public bool IsDir { get; set; }
    public List<PathInfo> Childs { get; set; }


    public override string ToString()
    {
        return String.Format("{0} IsDir {1} ChildCount {2} AbsUrl {3}", RelativeUrl, IsDir, Childs.Count, AbsoluteUrlStr);
    }
}

I write some code that can get all path infos, including file and dir, from the IIS site which allow list directory. You can customize the regex to match your need (Or change to use html parser). Further you can add some code yourself to get more detailed info, like file size or create time.

you can get all path infos in 2 lines:

List<PathInfo> pathInfos = new List<PathInfo>();
HttpHelper.GetAllFilePathAndSubDirectory("http://localhost:33333/", pathInfos);

The helper code:

public static class HttpHelper
{
    public static string ReadHtmlContentFromUrl(string url)
    {
        string html = string.Empty;
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

        using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
        using (Stream stream = response.GetResponseStream())
        using (StreamReader reader = new StreamReader(stream))
        {
            html = reader.ReadToEnd();
        }
        //Console.WriteLine(html);
        return html;
    }

    public static void GetAllFilePathAndSubDirectory(string baseUrl, List<PathInfo> pathInfos)
    {
        Uri baseUri = new Uri( baseUrl.TrimEnd('/') );
        string rootUrl = baseUri.GetLeftPart(UriPartial.Authority);


        Regex regexFile = new Regex("[0-9] <a href=\"(http:|https:)?(?<file>.*?)\"", RegexOptions.IgnoreCase);
        Regex regexDir = new Regex("dir.*?<a href=\"(http:|https:)?(?<dir>.*?)\"", RegexOptions.IgnoreCase);

        string html = ReadHtmlContentFromUrl(baseUrl);
        //Files
        MatchCollection matchesFile = regexFile.Matches(html);
        if (matchesFile.Count != 0)
            foreach (Match match in matchesFile)
                if (match.Success)
                    pathInfos.Add(
                        new PathInfo( rootUrl + match.Groups["file"], false));
        //Dir
        MatchCollection matchesDir = regexDir.Matches(html);
        if (matchesDir.Count != 0)
            foreach (Match match in matchesDir)
                if (match.Success)
                {
                    var dirInfo = new PathInfo(rootUrl + match.Groups["dir"], true);
                    GetAllFilePathAndSubDirectory(dirInfo.AbsoluteUrlStr, dirInfo.Childs);
                    pathInfos.Add(dirInfo);
                }                        

    }


    public static void PrintAllPathInfo(List<PathInfo> pathInfos)
    {
        pathInfos.ForEach(f =>
        {
            Console.WriteLine(f.AbsoluteUrlStr);
            PrintAllPathInfo(f.Childs);
        });
    }

}



public class PathInfo
{
    public PathInfo(string absoluteUri, bool isDir)
    {
        AbsoluteUrl = new Uri(absoluteUri);
        IsDir = isDir;
        Childs = new List<PathInfo>();
    }

    public Uri AbsoluteUrl { get; set; }

    public string AbsoluteUrlStr
    {
        get { return AbsoluteUrl.ToString(); }
    }

    public string RootUrl
    {
        get { return AbsoluteUrl.GetLeftPart(UriPartial.Authority); }
    }

    public string RelativeUrl
    {
        get { return AbsoluteUrl.PathAndQuery; }
    }

    public string Query
    {
        get { return AbsoluteUrl.Query; }
    }

    public bool IsDir { get; set; }
    public List<PathInfo> Childs { get; set; }


    public override string ToString()
    {
        return String.Format("{0} IsDir {1} ChildCount {2} AbsUrl {3}", RelativeUrl, IsDir, Childs.Count, AbsoluteUrlStr);
    }
}

回复收藏 0 原文

~没有更多了~