Speech to Text
upload-transcribe.tsx
import {
getTranscription,
uploadAudioForTranscription,
} from "@/app/utils/speechToText";
import { Button } from "@/components/ui/button";
import FileUploadModal from "@/components/ui/file-upload-modal";
import { timeout } from "@/lib/utils";
import { Microphone } from "@mynaui/icons-react";
export default function UploadTranscribeModal() {
return (
<FileUploadModal
maxFiles={1}
handleUpload={async (files) => {
if (files[0]) {
console.log("transcribe", files);
const res = await uploadAudioForTranscription(files[0]);
await timeout(1000);
const transcript = await getTranscription(res.id);
console.log(transcript);
}
}}
>
<Button variant="secondary" size={"icon"}>
<Microphone className="h-4 w-4" />
</Button>
</FileUploadModal>
);
}
import { getTranscription, uploadAudioForTranscription } from "@/app/utils/speechToText";
import { ButtonVariant, LoadingButton } from "@/components/ui/button";
import { ID, timeout } from "@/lib/utils";
import { Microphone } from "@mynaui/icons-react";
import { useEffect, useRef, useState } from "react";
async function getMediaStream() {
let stream = null;
try {
// constraints - only audio needed for this app
stream = await navigator.mediaDevices.getUserMedia({ audio: true });
} catch (err) {
console.error(`The following getUserMedia error occurred: ${err}`);
}
return stream;
}
async function getFileTranscription(file: File) {
console.log("transcribe", file);
const res = await uploadAudioForTranscription(file);
await timeout(1000);
return await getTranscription(res.id);
}
type SpeechState = "not_ready" | "ready" | "recording" | "transcribing"
const variant = {
not_ready: "secondary",
ready: "default",
recording: "destructive",
transcribing: "secondary",
};
export default function SpeechInput({
onTranscribe,
}: {
onTranscribe: (text: string) => void;
}) {
const [mediaRecorder, setMediaRecorder] = useState<MediaRecorder>();
const [speechState, setSpeechState] = useState<SpeechState>("not_ready");
const [recordedClip, setRecordedClip] = useState<{
url: string;
file: File;
}>();
const isTranscribing = speechState === "transcribing";
useEffect(() => {
if (!recordedClip) return;
getFileTranscription(recordedClip.file).then((result) => {
if (result) onTranscribe(result.text);
setSpeechState("ready");
});
}, [recordedClip]);
const _ = useRef({
chunks: [] as BlobPart[],
type: "audio/mpeg",
}).current;
const requestMediaRecorder = () => {
getMediaStream().then((stream) => {
if (!stream) return;
const mediaRecorder = new MediaRecorder(stream);
setMediaRecorder(mediaRecorder);
setSpeechState("ready");
mediaRecorder.addEventListener("dataavailable", (e) => {
_.chunks.push(e.data);
});
mediaRecorder.addEventListener("stop", () => {
console.log("MediaRecorder", mediaRecorder.state);
const id = ID();
const blob = new Blob(_.chunks, { type: _.type });
const file = new File([blob], `${id}.mp3`);
const url = window.URL.createObjectURL(blob);
setRecordedClip({ url, file });
});
});
};
const onClick = () => {
if (!mediaRecorder) {
return requestMediaRecorder();
}
if(isTranscribing) {
console.log("transcribing, please wait...");
return;
}
if (speechState === "ready") {
_.chunks = [];
setSpeechState("recording");
mediaRecorder.start();
console.log("MediaRecorder", mediaRecorder.state);
} else if(speechState === "recording") {
setSpeechState("transcribing");
mediaRecorder.stop();
}
};
return (
<LoadingButton
loading={isTranscribing}
variant={variant[speechState] as ButtonVariant}
size={isTranscribing ? "default" : "icon"}
onClick={onClick}
>
{!isTranscribing && <Microphone className="h-4 w-4" />}
{isTranscribing && "transcribing..."}
</LoadingButton>
);
}
speechToText.ts
import { timeout } from "@/lib/utils";
import { getMD5 } from ".";
export async function uploadAudioForTranscription(file: File) {
const id = await getMD5(file);
const formData = new FormData();
formData.append("id", id);
formData.append("file", file);
try {
const res = await fetch("https://nuk.scriptsync.app/_transcribe", {
method: "POST",
body: formData,
});
const data = await res.json();
console.log("Speech file processing");
return data;
} catch (err) {
console.log("Error uploading file", err);
return null;
}
}
export async function getTranscription(
id: string,
interval = 2000,
tries = 30
) {
try {
const res = await fetch(`https://nuk.scriptsync.app/_transcript/${id}`);
const data = (await res.json()) as Obj;
if (data.text) {
return data; // { text, segments, language }
} else if (data.status === "processing" || data.status === "queued") {
if (tries === 0) return null;
await timeout(interval);
return await getTranscription(id, interval, tries - 1);
} else if (data.status === "error") {
return null;
}
return null;
} catch (err) {
console.log("Error getting file", err);
return null;
}
}