. NET collects all videos and information from Youku albums (VB. NET code)

Source: Internet
Author: User

This method extracts Youku's album ID, and then cyclically collects webpage code by ID to extract the title tag and VID. There is no technical content. =... =
The HttpWebRequest and HttpWebResponse classes in. NET are applied in the collection. Regular expressions are used for code analysis.
This code is not very efficient. The resolution time for a webpage is between 0.5 and ~ Within 2 seconds, not suitable for massive collection. It may be faster to convert it to JavaScript.
So much research has been done for the moment, and the code is directly sent out for sharing.

Code VB. NET, create a form frmMain, add a TextBox, A ListBox, two buttons, and copy the following code:

Copy codeThe Code is as follows:
Imports System. Net
Imports System. IO
Imports System. Text
Imports System. Text. RegularExpressions
Public Class frmMain
Structure VList
Dim id As Integer
Dim title As String
Dim vid1 As String
Dim vid2 As String
Overloads Function ToString () As String
Return String. Format ("{0 }:< {1}> [{2}]", id, title, vid1)
End Function
End Structure
Dim myList As New List (Of VList)
Private Sub button#click (ByVal sender As System. Object, ByVal e As System. EventArgs) Handles Button1.Click
'Prevent repeated variable Creation
Dim wr1 As HttpWebRequest
Dim wr2 As HttpWebResponse
Dim ret As String
Dim reg As Match
Dim g As Group
Dim preVid As String = "" 'previous VID
Dim nowid As Integer = 0' current number of video sets
Dim listUrl As String = TextBox1.Text 'get album URL, such As http://www.youku.com/playlist_show/id_2350764.html
Dim tarUrl As String = "http://v.youku.com/v_playlist/f00000}" '{0} ListID
Reg = Regex. Match (listUrl, "playlist_show/id _ (\ d +). * \. html ")
If Not reg. Success Then
MsgBox ("album list extraction failed! ")
Exit Sub
End If
G = reg. Groups (1)
TarUrl = String. Format (tarUrl, g. Value) & "o%1%p%0%.html" '{0} number of sets {1} sorting
Wr1 = HttpWebRequest. Create (TextBox1.Text)
Wr2 = wr1.GetResponse
Ret = New StreamReader (wr2.GetResponseStream, Encoding. GetEncoding (wr2.CharacterSet). ReadToEnd
Reg = Regex. Match (ret, "<title> (. +)-album-youku video </title> ")
If Not reg. Success Then
MsgBox ("album name extraction failed! ")
Else
G = reg. Groups (1)
MsgBox ("album name:" & g. Value & "")
End If
Do
'Retrieve the page text from the Web stream
Wr1 = HttpWebRequest. Create (String. Format (tarUrl, nowid, "0") 'searches videos in reverse order.
Wr2 = wr1.GetResponse
Ret = New StreamReader (wr2.GetResponseStream, Encoding. GetEncoding (wr2.CharacterSet). ReadToEnd
'Textbox2. Text = ret
'Create a temporary video list variable
Dim nlist As New VList
Nlist. id = nowid 'Get ID
'Get videoId
Reg = Regex. Match (ret, "var \ s + videoId \ s * = \ s *" "(\ d +)" "\ s *;")
If Not reg. Success Then Exit Do
G = reg. Groups (1)
'If the VID is equal to the last VID, the last VID exits.
If g. Value = preVid Then Exit Do
Nlist. vid1 = g. Value
'Get videoId2
Reg = Regex. match (ret, "var \ s + videoId2 \ s * = \ s *" "(\ w | =) +)" "\ s *;") '"var \ s + videoId2 \ s * = \ s *" "(\ w +)" "\ s *;")
If Not reg. Success Then Exit Do
G = reg. Groups (1)
Nlist. vid2 = g. Value
'Retrieve the title
Reg = Regex. Match (ret, "<title> (. +)-(. +)-video-youku video-watch online-</title> ")
If Not reg. Success Then
Nlist. title = "{name search error }"
Else
G = reg. Groups (2)
Nlist. title = g. Value
End If
'Finishing the work
MyList. Add (nlist) 'is added to the total list.
PreVid = nlist. vid1 'record the last VID
Wr2.Close ()
Me. Text = nowid & ": Processing complete! "
Nowid + = 1
Loop
Wr2.Close ()
MsgBox (nowid & "videos are all collected and processed! ")
Button2_Click (sender, e)
End Sub
Private Sub Button2_Click (ByVal sender As System. Object, ByVal e As System. EventArgs) Handles Button2.Click
ListBox1.Items. Clear ()
For Each ls As VList In myList
ListBox1.Items. Add (String. Format ("{0 }:< {1}> [{2}]", ls. id, ls. title, ls. vid1 ))
Next
MyList. Clear ()
End Sub

End Class


Night smell original
Blog: http://clso.cnblogs.com
Home: http://cleclso.cn
QQ: 315514678 E-mail: clso # qq.com
Technical Exchange is welcome!

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.