【python】download google music

时间：2011-06-12 来源：K++
#!/usr/bin/env python3
#gmd.py

"""Google Music Downloader(GMD)
    --Version:  1.5
    --Update:   2011-06-12
    --Author:   kev++
    --License:  GNU GPL

    Usage: gmd.py [options] AlbumId

    Options:
      -h, --help                    show this help message and exit
      -r RANGES, --ranges=RANGES    album index ranges
      -p PATH, --path=PATH          local path
      --nolog                       don't log

Quick-Start Tutorial:
    Open your Firefox, type url:

    http://www.google.cn/music/artist?id=Ae2300d8b0232c06c
                                         =================
    Ae2300d8b0232c06c is AlbumId
    =================    =======
    Type $ python gmd.py Ae2300d8b0232c06c in cmd/bash shell
           ===============================
    Hit Enter key
        =====
    Everything should be OK if you're lucky

Real-World Example:
    C:>python gmd.py --ranges :2,7:8,11: B17b0e7e724dd4e79
     -----------Songs-----------
    |    1: A Force De Solitude |
    |    2: C'Est Parce Que Je T|
    |    7: Jamais Personne     |
    |    8: Je Laime            |
    |   11: Marie               |
    |   12: Si Un Jour          |
     ---------------------------
         1: A Force De Solitude
            ✓ 100.0% of 4.63MB
         2: C'Est Parce Que Je T'aime
            X file exists
         7: Jamais Personne
            ↓  14.1% of 3.21MB

Known Issus:
    Downloading a lot of songs in a short period causes IP blocked!(img code required)
    Some of downloading base URL is :http://g.top100.cn/16667639/html/download.html?id=<SongId>
"""

import sys, re, urllib.request, os, string, datetime, optparse
import xml.etree.ElementTree as ET
from functools import reduce

def downloadAlbum(albumId, ranges=':', path=None, islog=True):
    album = parseAlbum(albumId, ranges)
    printSummary(album)
    dirPath = makeDir(album['info'], path)
    for song in album['songs']:
        downloadSong(song, dirPath)
    if islog:
        log(album, dirPath)
    return report(album)

def makeDir(info, path):
    dirPath = path
    if not dirPath:
        homePath = os.path.expanduser('~')  #default download to '$HOME'
        subPath = string.Template('Music/${artist}/${name}').substitute(info).translate(str.maketrans('', '', '\:*?<>|"'))
        dirPath = os.path.join(homePath, subPath)
    if not os.path.exists(dirPath):
        os.makedirs(dirPath)
    return dirPath

def parseAlbum(albumId, ranges):
    albumXmlUrl='http://www.google.cn/music/album?id={0}&output=xml'.format(albumId)
    xmlTree = ET.parse(urllib.request.urlopen(albumXmlUrl))
    info = dict((i.tag, i.text) for i in xmlTree.find('.//info') if i.tag in ['id', 'name', 'artist', 'releaseDate'])
    songs = [dict((tag, song.findtext(tag)) for tag in ['id', 'name', 'artist']) for song in xmlTree.findall('.//song')] #a list of dict
    info['total'] = len(songs)
    rs = parseranges(ranges, len(songs))
    for i, song in enumerate(songs):
        song['track'] = i+1
        song['name'] = song['name'].translate(str.maketrans('', '', r'\/:*?<>|"'))
        song['download'] = True if i in rs else False
        song['url'] = getSongUrl(song['id']) if song['download'] else None
    album = dict(info=info, songs=songs)
    return album

def getSongUrl(songId):
    url='http://www.google.cn/music/top100/musicdownload?id={0}'.format(songId)
    html=urllib.request.urlopen(url).read().decode('utf-8').replace('&quot;', '"')
    matches = re.search(r'"/(music/top100/url\?q=http://file[^"]+)"', html, re.I)
    if matches is not None:
        return 'http://www.google.cn/{0}'.format(matches.group(1)).replace('&amp;', '&')
    else:
        return None

def printSummary(album):
    print(' -----------Songs----------- ')
    for song in album['songs']:
        if song['download']:
            print('| {0}{1:>3}: {2:<20.20}|'.format(' ' if song['url'] else 'X', song['track'], song['name']))
    print(' --------------------------- ')

def downloadSong(song, path):
    if not song['download']:
        return

    try:
        print('  {0}{1:>3}: {2}'.format(' ' if song['url'] else 'X', song['track'], song['name']))
        fileName = os.path.join(path, song['name']+'.mp3')
        if os.path.exists(fileName):
            print('    X file exists')
        elif not song['url']:
            print('    X url is empty')
        else:
            downloadFile(song['url'], fileName)
            print('    √')
    except Exception as e:
        print('    X cannot download. Error msg:', e)
        song['url'] = None

def downloadFile(url, fileName):
    def urlretrieve(url, fileName, reporthook=None):
        with open(fileName, 'wb') as file:
            resp=urllib.request.urlopen(url)
            inf = resp.info()
            size = 0
            bn = 0
            bs = 8192
            ds = 0

            if 'Content-Length' in inf:
                size = int(inf['Content-Length'])

            while True:
                buf = resp.read(bs)
                if not buf:
                    break
                file.write(buf)
                bn+=1
                ds+=len(buf)
                if reporthook:
                    reporthook(bn, bs, size)
            return ds

    def downloadReporthook(blocknum, bs, size):
        if size==0:
            size=1e10
        percent = blocknum*bs*100/size
        print('\t↓ {0:5.1f}% of {1:.2f}MB\r'.format(min(100, percent), size/1024/1024), end='')
        sys.stdout.flush()

    urlretrieve(url, fileName, downloadReporthook) #`urllib.request.urlretrieve' is VERY SLOW

def log(album, path):
    info = album['info']
    songs = album['songs']

    root = ET.Element('album')
    for key in ['id', 'name', 'artist', 'releaseDate', 'total']:
        e = ET.SubElement(root, key)
        e.text = str(info[key])

    ET.SubElement(root, 'downloadDate').text = str(datetime.datetime.now())
    esongs = ET.SubElement(root, 'songs')

    for song in songs:
        esong = ET.SubElement(esongs, 'song')
        for key in ['id', 'track', 'name', 'url', 'download']:
            e = ET.SubElement(esong, key)
            e.text = str(song[key])

    logName = os.path.join(path, 'log.xml')
    xml = ET.ElementTree(root)
    xml.write(logName, encoding='utf-8')

def parseranges(ranges, n):
    """
    Translate ":2,4:6,9:" to "0 1 3 4 5 8 9...n-1"
               == === ==      === ===== =========
    """
    def torange(x, n):
        if len(x)==1:
            (x0, ) = x
            s = 1 if x0=='' else int(x0)
            e = n if x0=='' else s
        elif len(x)==2:
            (x0, x1) = x
            s = 1 if x0=='' else int(x0)
            e = n if x1=='' else int(x1)
        else:
            raise ValueError
        return range(s-1, e)
    return sorted(reduce(lambda x, y:x.union(set(y)), map(lambda x:torange(x, n), map(lambda x:x.split(':'), ranges.split(','))), set()))

def report(album):
    def toranges(l):
        if not l:
            return ''
        l = sorted(l)
        ranges = []
        start = None
        end = None
        for i in l:
            if start == None:
                start = end = i
            elif i == end+1:
                end = i
            else:
                ranges.append(range(start,end+1))
                start = end = i
        else:
            ranges.append(range(start,end+1))
        return ",".join(["{0}".format(r[0]) if len(r)==1 else "{0}:{1}".format(r[0],r[-1]) for r in ranges])
    failures = toranges([int(song['track']) for song in album['songs'] if song['download'] and not song['url']])
    return failures

if __name__=='__main__':
    parser = optparse.OptionParser(usage='usage: %prog [options] AlbumId1 [AlbumId2..]')
    parser.add_option('-r', '--ranges', dest='ranges', type=str, default=':', help='album index ranges')
    parser.add_option('-p', '--path', dest='path', default=None, help='local path')
    parser.add_option('-b', '--beep', dest='beep', action='store_true', default=False, help='beep when finish')
    parser.add_option('-y', '--yes', dest='yes', action='store_true', default=False, help='say yes to redownload')
    parser.add_option('--nolog', dest='islog', action='store_false', default=True, help='don\'t log')
    (opts, args) = parser.parse_args()

    if not re.match(r'^(\d*(:\d*)?)(,(\d*(:\d*)?))*$', opts.ranges):
        parser.error('option [ranges] format wrong')
    else:
        RANGES = opts.ranges

    PATH = opts.path
    BEEP = opts.beep
    YES = opts.yes
    ISLOG = opts.islog

    if not len(args):
        parser.error('argument [AlbumId] cannot be empty')

    for arg in args:
        if not re.match('^\w{17}$', arg):
            parser.error('argument [AlbumId] format is wrong')
        else:
            ALBUMID = arg

        while True:
            RANGES = downloadAlbum(ALBUMID, RANGES, PATH, ISLOG)
            if BEEP:
                print('\a')
            if not RANGES:
                break
            if YES:
                print('\nAuto redownload AlbumId:"{0}" Tracks:"{1}"!'.format(ALBUMID, RANGES))
                continue
            ans = input('\nRedownload AlbumId:"{0}" Tracks:"{1}"?(Y/N): '.format(ALBUMID, RANGES))
            if ans.lower().strip() != 'y':
                print('You can also add "-r {0}" options to redownload later!'.format(RANGES))
                break