【python】download google music
时间:2011-06-12 来源:K++
#!/usr/bin/env python3
#gmd.py
"""Google Music Downloader(GMD)
--Version: 1.5
--Update: 2011-06-12
--Author: kev++
--License: GNU GPL
Usage: gmd.py [options] AlbumId
Options:
-h, --help show this help message and exit
-r RANGES, --ranges=RANGES album index ranges
-p PATH, --path=PATH local path
--nolog don't log
Quick-Start Tutorial:
Open your Firefox, type url:
http://www.google.cn/music/artist?id=Ae2300d8b0232c06c
=================
Ae2300d8b0232c06c is AlbumId
================= =======
Type $ python gmd.py Ae2300d8b0232c06c in cmd/bash shell
===============================
Hit Enter key
=====
Everything should be OK if you're lucky
Real-World Example:
C:>python gmd.py --ranges :2,7:8,11: B17b0e7e724dd4e79
-----------Songs-----------
| 1: A Force De Solitude |
| 2: C'Est Parce Que Je T|
| 7: Jamais Personne |
| 8: Je Laime |
| 11: Marie |
| 12: Si Un Jour |
---------------------------
1: A Force De Solitude
✓ 100.0% of 4.63MB
2: C'Est Parce Que Je T'aime
X file exists
7: Jamais Personne
↓ 14.1% of 3.21MB
Known Issus:
Downloading a lot of songs in a short period causes IP blocked!(img code required)
Some of downloading base URL is :http://g.top100.cn/16667639/html/download.html?id=<SongId>
"""
import sys, re, urllib.request, os, string, datetime, optparse
import xml.etree.ElementTree as ET
from functools import reduce
def downloadAlbum(albumId, ranges=':', path=None, islog=True):
album = parseAlbum(albumId, ranges)
printSummary(album)
dirPath = makeDir(album['info'], path)
for song in album['songs']:
downloadSong(song, dirPath)
if islog:
log(album, dirPath)
return report(album)
def makeDir(info, path):
dirPath = path
if not dirPath:
homePath = os.path.expanduser('~') #default download to '$HOME'
subPath = string.Template('Music/${artist}/${name}').substitute(info).translate(str.maketrans('', '', '\:*?<>|"'))
dirPath = os.path.join(homePath, subPath)
if not os.path.exists(dirPath):
os.makedirs(dirPath)
return dirPath
def parseAlbum(albumId, ranges):
albumXmlUrl='http://www.google.cn/music/album?id={0}&output=xml'.format(albumId)
xmlTree = ET.parse(urllib.request.urlopen(albumXmlUrl))
info = dict((i.tag, i.text) for i in xmlTree.find('.//info') if i.tag in ['id', 'name', 'artist', 'releaseDate'])
songs = [dict((tag, song.findtext(tag)) for tag in ['id', 'name', 'artist']) for song in xmlTree.findall('.//song')] #a list of dict
info['total'] = len(songs)
rs = parseranges(ranges, len(songs))
for i, song in enumerate(songs):
song['track'] = i+1
song['name'] = song['name'].translate(str.maketrans('', '', r'\/:*?<>|"'))
song['download'] = True if i in rs else False
song['url'] = getSongUrl(song['id']) if song['download'] else None
album = dict(info=info, songs=songs)
return album
def getSongUrl(songId):
url='http://www.google.cn/music/top100/musicdownload?id={0}'.format(songId)
html=urllib.request.urlopen(url).read().decode('utf-8').replace('"', '"')
matches = re.search(r'"/(music/top100/url\?q=http://file[^"]+)"', html, re.I)
if matches is not None:
return 'http://www.google.cn/{0}'.format(matches.group(1)).replace('&', '&')
else:
return None
def printSummary(album):
print(' -----------Songs----------- ')
for song in album['songs']:
if song['download']:
print('| {0}{1:>3}: {2:<20.20}|'.format(' ' if song['url'] else 'X', song['track'], song['name']))
print(' --------------------------- ')
def downloadSong(song, path):
if not song['download']:
return
try:
print(' {0}{1:>3}: {2}'.format(' ' if song['url'] else 'X', song['track'], song['name']))
fileName = os.path.join(path, song['name']+'.mp3')
if os.path.exists(fileName):
print(' X file exists')
elif not song['url']:
print(' X url is empty')
else:
downloadFile(song['url'], fileName)
print(' √')
except Exception as e:
print(' X cannot download. Error msg:', e)
song['url'] = None
def downloadFile(url, fileName):
def urlretrieve(url, fileName, reporthook=None):
with open(fileName, 'wb') as file:
resp=urllib.request.urlopen(url)
inf = resp.info()
size = 0
bn = 0
bs = 8192
ds = 0
if 'Content-Length' in inf:
size = int(inf['Content-Length'])
while True:
buf = resp.read(bs)
if not buf:
break
file.write(buf)
bn+=1
ds+=len(buf)
if reporthook:
reporthook(bn, bs, size)
return ds
def downloadReporthook(blocknum, bs, size):
if size==0:
size=1e10
percent = blocknum*bs*100/size
print('\t↓ {0:5.1f}% of {1:.2f}MB\r'.format(min(100, percent), size/1024/1024), end='')
sys.stdout.flush()
urlretrieve(url, fileName, downloadReporthook) #`urllib.request.urlretrieve' is VERY SLOW
def log(album, path):
info = album['info']
songs = album['songs']
root = ET.Element('album')
for key in ['id', 'name', 'artist', 'releaseDate', 'total']:
e = ET.SubElement(root, key)
e.text = str(info[key])
ET.SubElement(root, 'downloadDate').text = str(datetime.datetime.now())
esongs = ET.SubElement(root, 'songs')
for song in songs:
esong = ET.SubElement(esongs, 'song')
for key in ['id', 'track', 'name', 'url', 'download']:
e = ET.SubElement(esong, key)
e.text = str(song[key])
logName = os.path.join(path, 'log.xml')
xml = ET.ElementTree(root)
xml.write(logName, encoding='utf-8')
def parseranges(ranges, n):
"""
Translate ":2,4:6,9:" to "0 1 3 4 5 8 9...n-1"
== === == === ===== =========
"""
def torange(x, n):
if len(x)==1:
(x0, ) = x
s = 1 if x0=='' else int(x0)
e = n if x0=='' else s
elif len(x)==2:
(x0, x1) = x
s = 1 if x0=='' else int(x0)
e = n if x1=='' else int(x1)
else:
raise ValueError
return range(s-1, e)
return sorted(reduce(lambda x, y:x.union(set(y)), map(lambda x:torange(x, n), map(lambda x:x.split(':'), ranges.split(','))), set()))
def report(album):
def toranges(l):
if not l:
return ''
l = sorted(l)
ranges = []
start = None
end = None
for i in l:
if start == None:
start = end = i
elif i == end+1:
end = i
else:
ranges.append(range(start,end+1))
start = end = i
else:
ranges.append(range(start,end+1))
return ",".join(["{0}".format(r[0]) if len(r)==1 else "{0}:{1}".format(r[0],r[-1]) for r in ranges])
failures = toranges([int(song['track']) for song in album['songs'] if song['download'] and not song['url']])
return failures
if __name__=='__main__':
parser = optparse.OptionParser(usage='usage: %prog [options] AlbumId1 [AlbumId2..]')
parser.add_option('-r', '--ranges', dest='ranges', type=str, default=':', help='album index ranges')
parser.add_option('-p', '--path', dest='path', default=None, help='local path')
parser.add_option('-b', '--beep', dest='beep', action='store_true', default=False, help='beep when finish')
parser.add_option('-y', '--yes', dest='yes', action='store_true', default=False, help='say yes to redownload')
parser.add_option('--nolog', dest='islog', action='store_false', default=True, help='don\'t log')
(opts, args) = parser.parse_args()
if not re.match(r'^(\d*(:\d*)?)(,(\d*(:\d*)?))*$', opts.ranges):
parser.error('option [ranges] format wrong')
else:
RANGES = opts.ranges
PATH = opts.path
BEEP = opts.beep
YES = opts.yes
ISLOG = opts.islog
if not len(args):
parser.error('argument [AlbumId] cannot be empty')
for arg in args:
if not re.match('^\w{17}$', arg):
parser.error('argument [AlbumId] format is wrong')
else:
ALBUMID = arg
while True:
RANGES = downloadAlbum(ALBUMID, RANGES, PATH, ISLOG)
if BEEP:
print('\a')
if not RANGES:
break
if YES:
print('\nAuto redownload AlbumId:"{0}" Tracks:"{1}"!'.format(ALBUMID, RANGES))
continue
ans = input('\nRedownload AlbumId:"{0}" Tracks:"{1}"?(Y/N): '.format(ALBUMID, RANGES))
if ans.lower().strip() != 'y':
print('You can also add "-r {0}" options to redownload later!'.format(RANGES))
break
相关阅读 更多 +