上次用gawk搭配curl實現了下載優酷視頻
,相比較而言,YouTube
雖然影響要大於優酷
,但從程式的實做而言,YouTube要簡單的多。
程式依然是讀取需要下載的列表檔案tube,格式應該是這樣:
在Cygwin下運行看起來是這樣的:
下面是代碼:
- #! /usr/local/bin/gawk -f
- ################################################################################
- #一個從youtube上下載flash視頻的程式
- #
- #Author: ChangHailong
- #E-mail: hailongchang@163.com
- #
- #Date: 2008/10/11
- ################################################################################
- BEGIN{
- BINMODE = 3
- }
- {
- filename = $2
- vid = parse_url($1)
- get_vid_info(vid,VID_INFO)
- download_video(VID_INFO[
"request"
],filename)
- print
"/nthe "
NR
" file has been downloaded/n"
- }
- ################################################################################
- #parse_url(url)用來從輸入的網址資訊中提取我們關心的資訊
- #url通常具有類似下面這樣的形式:http://www.youtube.com/watch?v=vkDAKke56n
- #這個函數就是能夠將視頻資訊vkDAKke56n網址中提取出來
- ################################################################################
- function parse_url(url)
- {
- gsub(/http:///
//,"",url)
- gsub(/www/./,
""
,url)
- gsub(/youtube/.com//watch/?v=/,
""
,url)
-
-
if
((p = index(url,
"&"
)) > 0)
- url = substr(url, 1, p-1)
-
return
url
- }
- ################################################################################
- #
- ################################################################################
- function get_vid_info(vid, VID_INFO)
- {
- InetFile =
"/inet/tcp/0/www.youtube.com/80"
- Request =
"GET /watch?v="
vid
" HTTP/1.1/r/n"
- Request = Request
"Host: www.youtube.com/r/n/r/n"
- print Request |& InetFile
-
while
((InetFile |& getline) >0)
- {
-
if
(match($0, /
"video_id"
:
"([^"
]+)
".+"
t
": "
([^
"]+)"
/, info_matches))
- {
- VID_INFO[
"request"
] =
"video_id="
info_matches[1]
"&t="
info_matches[2]
- close(InetFile)
-
return
- }
-
else
-
continue
- }
- close(InetFile)
-
return
- }
- ################################################################################
- function get_header_info(Inet,Request,HEADER_INFO)
- {
-
delete
HEADER_INFO
- OLD_RS=RS
- print Request |& InetFile
-
if
(Inet |& getline > 0)
- {
- HEADER_INFO[
"_status"
] = $2
- }
-
else
- {
- print
"I can not redirect net address for you. Quitting!"
- exit 1
- }
- RS=
"/r/n"
-
while
((Inet |& getline) > 0)
- {
-
if
(match($0, /([^:]+): (.+)/, matches))
- {
- HEADER_INFO[matches[1]] = matches[2]
- }
-
else
- {
-
break
- }
- }
- RS = OLD_RS
- }
- ################################################################################
- function download_video(req,filename)
- {
- InetFile =
"/inet/tcp/0/www.youtube.com/80"
- Request =
"GET /get_video?"
req
" HTTP/1.1/r/n"
- Request = Request
"Host: www.youtube.com/r/n/r/n"
- # RS=OLD_RS
- Loop = 0
-
do
- {
- get_header_info(InetFile,Request,HEADER_INFO)
-
if
(
"Location"
in HEADER_INFO)
- {
- #close www.youtube.com
- close(InetFile)
- parse_location(HEADER_INFO[
"Location"
], GOOGLE_VIDEO)
- InetFile = GOOGLE_VIDEO[
"InetFile"
]
- Request =
"GET "
GOOGLE_VIDEO[
"Request"
]
" HTTP/1.1/r/n"
- Request = Request
"Host: "
GOOGLE_VIDEO[
"Host"
]
"/r/n/r/n"
-
if
(InetFile ==
""
)
- {
- print
"I can not download flv for you, please check your vid"
-
return
- }
- }
- Loop++
- }
while
((
"Location"
in HEADER_INFO) && Loop < 5)
- filename=filename
".flv"
- save_file(InetFile,filename,HEADER_INFO)
- close(InetFile)
-
return
- }
- ################################################################################
- function parse_location(location, GOOGLE_VIDEO) {
-
if
(match(location, /http:////([^//]+)(//.+)/, mat)) {
- GOOGLE_VIDEO[
"InetFile"
] =
"/inet/tcp/0/"
mat[1]
"/80"
- GOOGLE_VIDEO[
"Host"
] = mat[1]
- GOOGLE_VIDEO[
"Request"
] = mat[2]
- }
-
else
{
- GOOGLE_VIDEO[
"InetFile"
] =
""
- GOOGLE_VIDEO[
"Host"
] =
""
- GOOGLE_VIDEO[
"Request"
] =
""
- }
- }
- ################################################################################
- function save_file(InetFile,filename,HEADER_INFO)
- {
- OLD_RS = RS
- OLD_ORS = ORS
- ORS =
""
- print
""
> filename
- RS =
"@"
- has_read = 0
- printf(
"The %d flv file has %10d bytes. I have downloaded: bytes"
,NR,HEADER_INFO[
"Content-Length"
]);
- printf(
" /b/b/b/b/b/b/b/b/b/b/b/b/b/b/b/b/b/b"
);
-
while
((InetFile |& getline) > 0)
- {
- has_read += length($0 RT)
- printf(
"%10d/b/b/b/b/b/b/b/b/b/b"
,has_read)
- print $0 RT >> filename
- }
- printf(
"%10d/b/b/b/b/b/b/b/b/b/b"
,has_read)
- RS = OLD_RS
- ORS = OLD_ORS
-
return
- }
- ################################################################################