Python Wave

音声デバイスの操作

波形ファイルの操作

wave ファイルを読み込んで、別のファイルに書き込む。

numpy / scipy を使わなくてよい方法を知りたいので、試行錯誤する。

とりあえず wave を使ってみる。16bit mono 48KHz sampling を対象としている。

C:\>python
Python 2.6 (r26:66721, Oct  2 2008, 11:35:03) [MSC v.1500 32 bit (Intel)] 
on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import wave
>>> a = wave.open("c:/data/A01.wav")
>>> a
<wave.Wave_read instance at 0x00A108C8>
>>> a.getnchannels()
1
>>> a.getsampwidth()
2
>>> a.getframerate()
48000
>>> a.getnframes()
136634
>>> d = a.readframes(a.getnframes())

>>> p = a.getparams()
>>> p
(1, 2, 48000, 136634, 'NONE', 'not compressed')

>>> w = wave.Wave_write()
Exception AttributeError: "Wave_write instance has no attribute '_file'" 
...
>>> w = wave.Wave_write("_test.wav")

>>> w.setparams(p)
>>> w.writeframes(d)
>>> w.close()

wave ファイルを作るところだけまとめ直す（2010-09-28追加）：

# buf に string として 16KHz サンプリングのバイナリが入っているとする
import wave
w = wave.Wave_write("_test.wav")
p = (1, 2, 16000, len(buf)/2, 'NONE', 'not compressed')
w.setparams(p)
w.writeframes(buf)
w.close()

d は String なので、音声を加工するためには：

>>> data = struct.unpack('%dh' % a.getnframes(), d)
>>> len(data)
136634
>>> data[0:10]
(0, 0, 7, 6, 4, 5, 4, 3, 2, 2)
>>> data[-10:]
(-1, -7, -6, -3, -3, -1, -4, -3, 0, -2)

音声ファイルの振幅を 1/10 にする

#!/usr/bin/python
# Takuya Nishimoto
 
import wave
import struct
import array
 
src_file = "C:/data/A01.wav"
a = wave.open(src_file)
d = a.readframes(a.getnframes())
p = a.getparams()
 
data = struct.unpack('%dh' % a.getnframes(), d)
data2 = [x / 10 for x in data]
d2 = array.array('h', data2).tostring()
 
w = wave.Wave_write("_test.wav")
w.setparams(p)
w.writeframes(d2)
w.close()

audioop.mul を使っても同じことができる。以下、引数で入出力ファイルを指定する。

#!/usr/bin/python
# Takuya Nishimoto
# usage: 
# make_wav.py dest.wav src.wav 
 
import wave
import sys
import audioop
 
argvs = sys.argv
dest_file = argvs[1]
src_file  = argvs[2] 
 
a = wave.open(src_file)
d = a.readframes(a.getnframes())
p = a.getparams()
 
d2 = audioop.mul(d, 2, 0.1)
 
w = wave.Wave_write(dest_file)
w.setparams(p)
w.writeframes(d2)
w.close()

sox でやるような nohead ファイル（拡張子 .sw）との相互変換。

#!/usr/bin/python
# Takuya Nishimoto
# usage: wav2sw.py src.wav dest.sw
# equiv: sox src.wav dest.sw
import wave
import sys
argvs = sys.argv
src_file  = argvs[1]
dest_file = argvs[2]
a = wave.open(src_file)
d = a.readframes(a.getnframes())
p = a.getparams()
w = open(dest_file, 'wb')
w.write(d)
w.close()

#!/usr/bin/python
# Takuya Nishimoto
# usage: sw2wav.py 48000 src.sw dest.wav 
# equiv: sox -r 48000 src.sw dest.wav
import wave
import sys
argvs = sys.argv
rate = int(argvs[1])
src_file  = argvs[2]
dest_file = argvs[3]
w = open(src_file, 'rb')
d = w.read()
w.close()
size = len(d) / 2
a = wave.open(dest_file, 'w')
p = a.setparams( (1, 2, rate, size, 'NONE', '') )
d = a.writeframes(d)

バイトオーダーを指定してタプルに変換する（＜はリトルエンディアン、＞はビッグエンディアン）

>>> import wave
>>> a = wave.open('FAK_3Z82A.wav','r')
>>> s = a.readframes(10)
>>> import struct
>>> t = struct.unpack('<10h', s)
>>> t
(-5, -6, -8, -7, -4, -5, -3, -1, -6, -4)
>>> t = struct.unpack('>10h', s)
>>> t
(-1025, -1281, -1793, -1537, -769, -1025, -513, -1, -1281, -769)
>>>

タプルのかわりに array を使う

>>> import array
>>> ar = array.array('h')
>>> ar.fromstring(s)
>>> ar
array('h', [-5, -6, -8, -7, -4, -5, -3, -1, -6, -4])

numpy.array を使う：

>>> import wave
>>> import numpy as np
>>> a = wave.open("hoge.wav")
>>> d = a.readframes(a.getnframes())
>>> x = np.frombuffer(d, dtype="int16")
>>> print(x)
[-2563 -2624 -2608 ...,   144   244  -187]
>>> np.amax(x)
16401
>>> np.amin(x)
-16452

ja.nishimotz.com

目次

Python Wave

音声デバイスの操作

波形ファイルの操作