| |
| |
| |
| |
| |
| |
|
|
| import json |
| from pathlib import Path |
| import math |
| import os |
| import sys |
|
|
| import torchaudio |
| from torch.nn import functional as F |
|
|
|
|
| def find_audio_files(path, exts=[".wav"], progress=True): |
| audio_files = [] |
| for root, folders, files in os.walk(path, followlinks=True): |
| for file in files: |
| file = Path(root) / file |
| if file.suffix.lower() in exts: |
| audio_files.append(str(file.resolve())) |
| meta = [] |
| for idx, file in enumerate(audio_files): |
| siginfo, _ = torchaudio.info(file) |
| length = siginfo.length // siginfo.channels |
| meta.append((file, length)) |
| if progress: |
| print(format((1 + idx) / len(audio_files), " 3.1%"), end='\r', file=sys.stderr) |
| meta.sort() |
| return meta |
|
|
|
|
| class Audioset: |
| def __init__(self, files=None, length=None, stride=None, |
| pad=True, with_path=False, sample_rate=None): |
| """ |
| files should be a list [(file, length)] |
| """ |
| self.files = files |
| self.num_examples = [] |
| self.length = length |
| self.stride = stride or length |
| self.with_path = with_path |
| self.sample_rate = sample_rate |
| for file, file_length in self.files: |
| if length is None: |
| examples = 1 |
| elif file_length < length: |
| examples = 1 if pad else 0 |
| elif pad: |
| examples = int(math.ceil((file_length - self.length) / self.stride) + 1) |
| else: |
| examples = (file_length - self.length) // self.stride + 1 |
| self.num_examples.append(examples) |
|
|
| def __len__(self): |
| return sum(self.num_examples) |
|
|
| def __getitem__(self, index): |
| for (file, _), examples in zip(self.files, self.num_examples): |
| if index >= examples: |
| index -= examples |
| continue |
| num_frames = 0 |
| offset = 0 |
| if self.length is not None: |
| offset = self.stride * index |
| num_frames = self.length |
| out, sr = torchaudio.load(str(file), offset=offset, num_frames=num_frames) |
| if self.sample_rate is not None: |
| if sr != self.sample_rate: |
| raise RuntimeError(f"Expected {file} to have sample rate of " |
| f"{self.sample_rate}, but got {sr}") |
| if num_frames: |
| out = F.pad(out, (0, num_frames - out.shape[-1])) |
| if self.with_path: |
| return out, file |
| else: |
| return out |
|
|
|
|
| if __name__ == "__main__": |
| meta = [] |
| for path in sys.argv[1:]: |
| meta += find_audio_files(path) |
| json.dump(meta, sys.stdout, indent=4) |
|
|