bandripper.bandripper

  1import argparse
  2import json
  3import re
  4import string
  5from dataclasses import dataclass
  6from pathlib import Path
  7from urllib.parse import urlparse
  8
  9import requests
 10from bs4 import BeautifulSoup
 11
 12import whosyouragent
 13from noiftimer import Timer
 14from printbuddies import ProgBar
 15
 16root = Path(__file__).parent
 17
 18
 19def clean_string(text: str) -> str:
 20    """Remove punctuation and trailing spaces from text."""
 21    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()
 22
 23
 24@dataclass
 25class Track:
 26    title: str
 27    number: int
 28    url: str
 29
 30    def __post_init__(self):
 31        self.title = clean_string(self.title)
 32
 33    @property
 34    def numbered_title(self):
 35        num = str(self.number)
 36        if len(num) == 1:
 37            num = "0" + num
 38        return f"{num} - {self.title}"
 39
 40
 41@dataclass
 42class Album:
 43    url: str
 44    artist: str = None
 45    title: str = None
 46    tracks: list[Track] = None
 47    art_url: str = None
 48
 49    def __repr__(self):
 50        return f"{self.title} by {self.artist}"
 51
 52    def __post_init__(self):
 53        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
 54        if response.status_code != 200:
 55            raise RuntimeError(
 56                f"Getting album info failed with code {response.status_code}"
 57            )
 58        soup = BeautifulSoup(response.text, "html.parser")
 59        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
 60        for script in soup.find_all("script"):
 61            if script.get("data-cart"):
 62                data = script
 63                break
 64        data = json.loads(data.attrs["data-tralbum"])
 65        self.artist = clean_string(data["artist"])
 66        self.title = clean_string(data["current"]["title"])
 67        self.tracks = [
 68            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
 69            for track in data["trackinfo"]
 70            if track.get("file")
 71        ]
 72
 73
 74class AlbumRipper:
 75    def __init__(
 76        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 77    ):
 78        """
 79        :param no_track_number: If True, don't add the track
 80        number to the front of the track title."""
 81        self.album = Album(album_url)
 82        self.no_track_number = no_track_number
 83        self.overwrite = overwrite
 84
 85    def make_save_path(self):
 86        self.save_path = Path.cwd() / self.album.artist / self.album.title
 87        self.save_path.mkdir(parents=True, exist_ok=True)
 88
 89    @property
 90    def headers(self) -> dict:
 91        """Get a headers dict with a random useragent."""
 92        return whosyouragent.get_agent(as_dict=True)
 93
 94    def save_track(self, track_title: str, content: bytes) -> Path:
 95        """Save track to self.save_path/{track_title}.mp3.
 96        Returns the Path object for the save location.
 97
 98        :param content: The binary data of the track."""
 99        file_path = self.save_path / f"{track_title}.mp3"
100        file_path.write_bytes(content)
101        return file_path
102
103    def get_track_content(self, track_url: str) -> bytes:
104        """Make a request to track_url and return the content.
105        Raises a RunTimeError exception if response.status_code != 200."""
106        response = requests.get(track_url, headers=self.headers)
107        if response.status_code != 200:
108            raise RuntimeError(
109                f"Downloading track failed with status code {response.status_code}."
110            )
111        return response.content
112
113    def download_album_art(self):
114        """Download the album art and save as a .jpg."""
115        file_path = self.save_path / f"{self.album.title}.jpg"
116        try:
117            response = requests.get(self.album.art_url, headers=self.headers)
118            file_path.write_bytes(response.content)
119        except Exception as e:
120            print(f"Failed to download art for {self.album}.")
121            print(e)
122
123    def track_exists(self, track: Track) -> bool:
124        """Return if a track already exists in self.save_path."""
125        path = self.save_path / (
126            track.title if self.no_track_number else track.numbered_title
127        )
128        return path.with_suffix(".mp3").exists()
129
130    def rip(self):
131        """Download and save the album tracks and album art."""
132        if len(self.album.tracks) == 0:
133            print(f"No public tracks available for {self.album}.")
134            return None
135        self.make_save_path()
136        self.download_album_art()
137        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
138        bar.timer.subsecond_resolution = True
139        fails = []
140        if not self.overwrite:
141            self.album.tracks = [
142                track for track in self.album.tracks if not self.track_exists(track)
143            ]
144        for track in self.album.tracks:
145            bar.display(
146                suffix=f"Downloading {track.title}",
147                counter_override=1 if len(self.album.tracks) == 1 else None,
148            )
149            try:
150                content = self.get_track_content(track.url)
151                self.save_track(
152                    track.title if self.no_track_number else track.numbered_title,
153                    content,
154                )
155            except Exception as e:
156                fails.append((track, str(e)))
157        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
158        if fails:
159            print("The following tracks failed to download:")
160            for fail in fails:
161                print(f"{fail[0].title}: {fail[1]}")
162
163
164class BandRipper:
165    def __init__(
166        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
167    ):
168        self.band_url = band_url
169        self.albums = []
170        for url in self.get_album_urls(band_url):
171            try:
172                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
173            except Exception as e:
174                print(e)
175
176    def get_album_urls(self, band_url: str) -> list[str]:
177        """Get album urls from the main bandcamp url."""
178        print(f"Fetching discography from {band_url}...")
179        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
180        if response.status_code != 200:
181            raise RuntimeError(
182                f"Getting {band_url} failed with status code {response.status_code}."
183            )
184        soup = BeautifulSoup(response.text, "html.parser")
185        grid = soup.find("ol", attrs={"id": "music-grid"})
186        parsed_url = urlparse(band_url)
187        base_url = f"https://{parsed_url.netloc}"
188        return [base_url + album.a.get("href") for album in grid.find_all("li")]
189
190    def rip(self):
191        print(
192            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
193        )
194        timer = Timer(subsecond_resolution=True)
195        timer.start()
196        fails = []
197        for album in self.albums:
198            try:
199                album.rip()
200            except Exception as e:
201                fails.append((album, e))
202        timer.stop()
203        artist = self.albums[0].album.artist
204        print(
205            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
206        )
207        if fails:
208            print(f"The following downloads failed:")
209            for fail in fails:
210                print(f"{fail[0]}: {fail[1]}")
211
212
213def page_is_discography(url: str) -> bool:
214    """Returns whether the url is to a discography page or not."""
215    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
216    if response.status_code != 200:
217        raise RuntimeError(
218            f"Getting {url} failed with status code {response.status_code}."
219        )
220    soup = BeautifulSoup(response.text, "html.parser")
221    # Returns None if it doesn't exist.
222    grid = soup.find("ol", attrs={"id": "music-grid"})
223    if grid:
224        return True
225    return False
226
227
228def get_args() -> argparse.Namespace:
229    parser = argparse.ArgumentParser()
230
231    parser.add_argument(
232        "urls",
233        type=str,
234        nargs="*",
235        help=""" The bandcamp url(s) for the album or artist.
236            If the url is to an artists main page,
237            all albums will be downloaded.
238            The tracks will be saved to a subdirectory of
239            your current directory.
240            If a track can't be streamed (i.e. private) it
241            won't be downloaded. Multiple urls can be passed.""",
242    )
243
244    parser.add_argument(
245        "-n",
246        "--no_track_number",
247        action="store_true",
248        help=""" By default the track number will be added
249        to the front of the track title. Pass this switch
250        to disable the behavior.""",
251    )
252
253    parser.add_argument(
254        "-o",
255        "--overwrite",
256        action="store_true",
257        help=""" Pass this flag to overwrite existing files.
258        Otherwise don't download tracks that already exist locally.""",
259    )
260
261    args = parser.parse_args()
262    args.urls = [url.strip("/") for url in args.urls]
263
264    return args
265
266
267def main(args: argparse.Namespace = None):
268    if not args:
269        args = get_args()
270    for url in args.urls:
271        if page_is_discography(url):
272            ripper = BandRipper(url, args.no_track_number, args.overwrite)
273        else:
274            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
275        ripper.rip()
276
277
278if __name__ == "__main__":
279    main(get_args())
def clean_string(text: str) -> str:
20def clean_string(text: str) -> str:
21    """Remove punctuation and trailing spaces from text."""
22    return re.sub(f"[{re.escape(string.punctuation)}]", "", text).strip()

Remove punctuation and trailing spaces from text.

@dataclass
class Track:
25@dataclass
26class Track:
27    title: str
28    number: int
29    url: str
30
31    def __post_init__(self):
32        self.title = clean_string(self.title)
33
34    @property
35    def numbered_title(self):
36        num = str(self.number)
37        if len(num) == 1:
38            num = "0" + num
39        return f"{num} - {self.title}"
Track(title: str, number: int, url: str)
@dataclass
class Album:
42@dataclass
43class Album:
44    url: str
45    artist: str = None
46    title: str = None
47    tracks: list[Track] = None
48    art_url: str = None
49
50    def __repr__(self):
51        return f"{self.title} by {self.artist}"
52
53    def __post_init__(self):
54        response = requests.get(self.url, headers=whosyouragent.get_agent(as_dict=True))
55        if response.status_code != 200:
56            raise RuntimeError(
57                f"Getting album info failed with code {response.status_code}"
58            )
59        soup = BeautifulSoup(response.text, "html.parser")
60        self.art_url = soup.find("meta", attrs={"property": "og:image"}).get("content")
61        for script in soup.find_all("script"):
62            if script.get("data-cart"):
63                data = script
64                break
65        data = json.loads(data.attrs["data-tralbum"])
66        self.artist = clean_string(data["artist"])
67        self.title = clean_string(data["current"]["title"])
68        self.tracks = [
69            Track(track["title"], track["track_num"], track["file"]["mp3-128"])
70            for track in data["trackinfo"]
71            if track.get("file")
72        ]
Album( url: str, artist: str = None, title: str = None, tracks: list[bandripper.bandripper.Track] = None, art_url: str = None)
class AlbumRipper:
 75class AlbumRipper:
 76    def __init__(
 77        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
 78    ):
 79        """
 80        :param no_track_number: If True, don't add the track
 81        number to the front of the track title."""
 82        self.album = Album(album_url)
 83        self.no_track_number = no_track_number
 84        self.overwrite = overwrite
 85
 86    def make_save_path(self):
 87        self.save_path = Path.cwd() / self.album.artist / self.album.title
 88        self.save_path.mkdir(parents=True, exist_ok=True)
 89
 90    @property
 91    def headers(self) -> dict:
 92        """Get a headers dict with a random useragent."""
 93        return whosyouragent.get_agent(as_dict=True)
 94
 95    def save_track(self, track_title: str, content: bytes) -> Path:
 96        """Save track to self.save_path/{track_title}.mp3.
 97        Returns the Path object for the save location.
 98
 99        :param content: The binary data of the track."""
100        file_path = self.save_path / f"{track_title}.mp3"
101        file_path.write_bytes(content)
102        return file_path
103
104    def get_track_content(self, track_url: str) -> bytes:
105        """Make a request to track_url and return the content.
106        Raises a RunTimeError exception if response.status_code != 200."""
107        response = requests.get(track_url, headers=self.headers)
108        if response.status_code != 200:
109            raise RuntimeError(
110                f"Downloading track failed with status code {response.status_code}."
111            )
112        return response.content
113
114    def download_album_art(self):
115        """Download the album art and save as a .jpg."""
116        file_path = self.save_path / f"{self.album.title}.jpg"
117        try:
118            response = requests.get(self.album.art_url, headers=self.headers)
119            file_path.write_bytes(response.content)
120        except Exception as e:
121            print(f"Failed to download art for {self.album}.")
122            print(e)
123
124    def track_exists(self, track: Track) -> bool:
125        """Return if a track already exists in self.save_path."""
126        path = self.save_path / (
127            track.title if self.no_track_number else track.numbered_title
128        )
129        return path.with_suffix(".mp3").exists()
130
131    def rip(self):
132        """Download and save the album tracks and album art."""
133        if len(self.album.tracks) == 0:
134            print(f"No public tracks available for {self.album}.")
135            return None
136        self.make_save_path()
137        self.download_album_art()
138        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
139        bar.timer.subsecond_resolution = True
140        fails = []
141        if not self.overwrite:
142            self.album.tracks = [
143                track for track in self.album.tracks if not self.track_exists(track)
144            ]
145        for track in self.album.tracks:
146            bar.display(
147                suffix=f"Downloading {track.title}",
148                counter_override=1 if len(self.album.tracks) == 1 else None,
149            )
150            try:
151                content = self.get_track_content(track.url)
152                self.save_track(
153                    track.title if self.no_track_number else track.numbered_title,
154                    content,
155                )
156            except Exception as e:
157                fails.append((track, str(e)))
158        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
159        if fails:
160            print("The following tracks failed to download:")
161            for fail in fails:
162                print(f"{fail[0].title}: {fail[1]}")
AlbumRipper( album_url: str, no_track_number: bool = False, overwrite: bool = False)
76    def __init__(
77        self, album_url: str, no_track_number: bool = False, overwrite: bool = False
78    ):
79        """
80        :param no_track_number: If True, don't add the track
81        number to the front of the track title."""
82        self.album = Album(album_url)
83        self.no_track_number = no_track_number
84        self.overwrite = overwrite
Parameters
  • no_track_number: If True, don't add the track number to the front of the track title.
def make_save_path(self):
86    def make_save_path(self):
87        self.save_path = Path.cwd() / self.album.artist / self.album.title
88        self.save_path.mkdir(parents=True, exist_ok=True)
headers: dict

Get a headers dict with a random useragent.

def save_track(self, track_title: str, content: bytes) -> pathlib.Path:
 95    def save_track(self, track_title: str, content: bytes) -> Path:
 96        """Save track to self.save_path/{track_title}.mp3.
 97        Returns the Path object for the save location.
 98
 99        :param content: The binary data of the track."""
100        file_path = self.save_path / f"{track_title}.mp3"
101        file_path.write_bytes(content)
102        return file_path

Save track to self.save_path/{track_title}.mp3. Returns the Path object for the save location.

Parameters
  • content: The binary data of the track.
def get_track_content(self, track_url: str) -> bytes:
104    def get_track_content(self, track_url: str) -> bytes:
105        """Make a request to track_url and return the content.
106        Raises a RunTimeError exception if response.status_code != 200."""
107        response = requests.get(track_url, headers=self.headers)
108        if response.status_code != 200:
109            raise RuntimeError(
110                f"Downloading track failed with status code {response.status_code}."
111            )
112        return response.content

Make a request to track_url and return the content. Raises a RunTimeError exception if response.status_code != 200.

def download_album_art(self):
114    def download_album_art(self):
115        """Download the album art and save as a .jpg."""
116        file_path = self.save_path / f"{self.album.title}.jpg"
117        try:
118            response = requests.get(self.album.art_url, headers=self.headers)
119            file_path.write_bytes(response.content)
120        except Exception as e:
121            print(f"Failed to download art for {self.album}.")
122            print(e)

Download the album art and save as a .jpg.

def track_exists(self, track: bandripper.bandripper.Track) -> bool:
124    def track_exists(self, track: Track) -> bool:
125        """Return if a track already exists in self.save_path."""
126        path = self.save_path / (
127            track.title if self.no_track_number else track.numbered_title
128        )
129        return path.with_suffix(".mp3").exists()

Return if a track already exists in self.save_path.

def rip(self):
131    def rip(self):
132        """Download and save the album tracks and album art."""
133        if len(self.album.tracks) == 0:
134            print(f"No public tracks available for {self.album}.")
135            return None
136        self.make_save_path()
137        self.download_album_art()
138        bar = ProgBar(len(self.album.tracks) - 1, width_ratio=0.5)
139        bar.timer.subsecond_resolution = True
140        fails = []
141        if not self.overwrite:
142            self.album.tracks = [
143                track for track in self.album.tracks if not self.track_exists(track)
144            ]
145        for track in self.album.tracks:
146            bar.display(
147                suffix=f"Downloading {track.title}",
148                counter_override=1 if len(self.album.tracks) == 1 else None,
149            )
150            try:
151                content = self.get_track_content(track.url)
152                self.save_track(
153                    track.title if self.no_track_number else track.numbered_title,
154                    content,
155                )
156            except Exception as e:
157                fails.append((track, str(e)))
158        print(f"Finished downloading {self.album} in {bar.timer.elapsed_str}.")
159        if fails:
160            print("The following tracks failed to download:")
161            for fail in fails:
162                print(f"{fail[0].title}: {fail[1]}")

Download and save the album tracks and album art.

class BandRipper:
165class BandRipper:
166    def __init__(
167        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
168    ):
169        self.band_url = band_url
170        self.albums = []
171        for url in self.get_album_urls(band_url):
172            try:
173                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
174            except Exception as e:
175                print(e)
176
177    def get_album_urls(self, band_url: str) -> list[str]:
178        """Get album urls from the main bandcamp url."""
179        print(f"Fetching discography from {band_url}...")
180        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
181        if response.status_code != 200:
182            raise RuntimeError(
183                f"Getting {band_url} failed with status code {response.status_code}."
184            )
185        soup = BeautifulSoup(response.text, "html.parser")
186        grid = soup.find("ol", attrs={"id": "music-grid"})
187        parsed_url = urlparse(band_url)
188        base_url = f"https://{parsed_url.netloc}"
189        return [base_url + album.a.get("href") for album in grid.find_all("li")]
190
191    def rip(self):
192        print(
193            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
194        )
195        timer = Timer(subsecond_resolution=True)
196        timer.start()
197        fails = []
198        for album in self.albums:
199            try:
200                album.rip()
201            except Exception as e:
202                fails.append((album, e))
203        timer.stop()
204        artist = self.albums[0].album.artist
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
BandRipper( band_url: str, no_track_number: bool = False, overwrite: bool = False)
166    def __init__(
167        self, band_url: str, no_track_number: bool = False, overwrite: bool = False
168    ):
169        self.band_url = band_url
170        self.albums = []
171        for url in self.get_album_urls(band_url):
172            try:
173                self.albums.append(AlbumRipper(url, no_track_number, overwrite))
174            except Exception as e:
175                print(e)
def get_album_urls(self, band_url: str) -> list[str]:
177    def get_album_urls(self, band_url: str) -> list[str]:
178        """Get album urls from the main bandcamp url."""
179        print(f"Fetching discography from {band_url}...")
180        response = requests.get(band_url, headers=whosyouragent.get_agent(as_dict=True))
181        if response.status_code != 200:
182            raise RuntimeError(
183                f"Getting {band_url} failed with status code {response.status_code}."
184            )
185        soup = BeautifulSoup(response.text, "html.parser")
186        grid = soup.find("ol", attrs={"id": "music-grid"})
187        parsed_url = urlparse(band_url)
188        base_url = f"https://{parsed_url.netloc}"
189        return [base_url + album.a.get("href") for album in grid.find_all("li")]

Get album urls from the main bandcamp url.

def rip(self):
191    def rip(self):
192        print(
193            f"Downloading {len(self.albums)} albums by {self.albums[0].album.artist}."
194        )
195        timer = Timer(subsecond_resolution=True)
196        timer.start()
197        fails = []
198        for album in self.albums:
199            try:
200                album.rip()
201            except Exception as e:
202                fails.append((album, e))
203        timer.stop()
204        artist = self.albums[0].album.artist
205        print(
206            f"Finished downloading {len(self.albums)} albums by {artist} in {timer.elapsed_str}."
207        )
208        if fails:
209            print(f"The following downloads failed:")
210            for fail in fails:
211                print(f"{fail[0]}: {fail[1]}")
def page_is_discography(url: str) -> bool:
214def page_is_discography(url: str) -> bool:
215    """Returns whether the url is to a discography page or not."""
216    response = requests.get(url, headers=whosyouragent.get_agent(as_dict=True))
217    if response.status_code != 200:
218        raise RuntimeError(
219            f"Getting {url} failed with status code {response.status_code}."
220        )
221    soup = BeautifulSoup(response.text, "html.parser")
222    # Returns None if it doesn't exist.
223    grid = soup.find("ol", attrs={"id": "music-grid"})
224    if grid:
225        return True
226    return False

Returns whether the url is to a discography page or not.

def get_args() -> argparse.Namespace:
229def get_args() -> argparse.Namespace:
230    parser = argparse.ArgumentParser()
231
232    parser.add_argument(
233        "urls",
234        type=str,
235        nargs="*",
236        help=""" The bandcamp url(s) for the album or artist.
237            If the url is to an artists main page,
238            all albums will be downloaded.
239            The tracks will be saved to a subdirectory of
240            your current directory.
241            If a track can't be streamed (i.e. private) it
242            won't be downloaded. Multiple urls can be passed.""",
243    )
244
245    parser.add_argument(
246        "-n",
247        "--no_track_number",
248        action="store_true",
249        help=""" By default the track number will be added
250        to the front of the track title. Pass this switch
251        to disable the behavior.""",
252    )
253
254    parser.add_argument(
255        "-o",
256        "--overwrite",
257        action="store_true",
258        help=""" Pass this flag to overwrite existing files.
259        Otherwise don't download tracks that already exist locally.""",
260    )
261
262    args = parser.parse_args()
263    args.urls = [url.strip("/") for url in args.urls]
264
265    return args
def main(args: argparse.Namespace = None):
268def main(args: argparse.Namespace = None):
269    if not args:
270        args = get_args()
271    for url in args.urls:
272        if page_is_discography(url):
273            ripper = BandRipper(url, args.no_track_number, args.overwrite)
274        else:
275            ripper = AlbumRipper(url, args.no_track_number, args.overwrite)
276        ripper.rip()