Enable the profanity filter

This page describes how to use Speech-to-Text to automatically detect profane words in your audio data and censor them in the transcript.

You can enable the profanity filter by setting profanityFilter=true in the RecognitionConfig. If enabled, Speech-to-Text will attempt to detect profane words and return only the first letter followed by asterisks in the transcript (for example, f***). If this field is set to false or not set, Speech-to-Text will not attempt to filter profanities.

The following sample demonstrates how to enable the profanity filter to recognize audio stored in a Google Cloud Storage bucket.

Java

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries. For more information, see the Speech-to-Text Java API reference documentation.

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

import com.google.cloud.speech.v1.RecognitionAudio;
import com.google.cloud.speech.v1.RecognitionConfig;
import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1.RecognizeResponse;
import com.google.cloud.speech.v1.SpeechClient;
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1.SpeechRecognitionResult;
import java.util.List;

public class SpeechProfanityFilter {

  public void speechProfanityFilter() throws Exception {
    String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac";
    speechProfanityFilter(uriPath);
  }

  /**
   * Transcribe a remote audio file with multi-channel recognition
   *
   * @param gcsUri the path to the audio file
   */
  public static void speechProfanityFilter(String gcsUri) throws Exception {
    // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
    try (SpeechClient speech = SpeechClient.create()) {

      // Configure remote file request
      RecognitionConfig config =
          RecognitionConfig.newBuilder()
              .setEncoding(AudioEncoding.FLAC)
              .setLanguageCode("en-US")
              .setSampleRateHertz(16000)
              .setProfanityFilter(true)
              .build();

      // Set the remote path for the audio file
      RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

      // Use blocking call to get audio transcript
      RecognizeResponse response = speech.recognize(config, audio);
      List<SpeechRecognitionResult> results = response.getResultsList();

      for (SpeechRecognitionResult result : results) {
        // There can be several alternative transcripts for a given chunk of speech. Just use the
        // first (most likely) one here.
        SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
        System.out.printf("Transcription: %s\n", alternative.getTranscript());
      }
    }
  }
}

Node.js

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries. For more information, see the Speech-to-Text Node.js API reference documentation.

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

// Filters profanity

/**
 * TODO(developer): Uncomment these variables before running the sample.
 */
// const gcsUri = 'gs://my-bucket/audio.raw';

async function syncRecognizeWithProfanityFilter() {
  // Imports the Google Cloud client library
  const speech = require('@google-cloud/speech');

  // Creates a client
  const client = new speech.SpeechClient();

  const audio = {
    uri: gcsUri,
  };

  const config = {
    encoding: 'FLAC',
    sampleRateHertz: 16000,
    languageCode: 'en-US',
    profanityFilter: true, // set this to true
  };
  const request = {
    audio: audio,
    config: config,
  };

  // Detects speech in the audio file
  const [response] = await client.recognize(request);
  const transcription = response.results
    .map(result => result.alternatives[0].transcript)
    .join('\n');
  console.log(`Transcription: ${transcription}`);
}
syncRecognizeWithProfanityFilter().catch(console.error);

Python

To learn how to install and use the client library for Speech-to-Text, see Speech-to-Text client libraries. For more information, see the Speech-to-Text Python API reference documentation.

To authenticate to Speech-to-Text, set up Application Default Credentials. For more information, see Set up authentication for a local development environment.

from google.cloud import speech
from google.cloud.speech import RecognizeResponse


def sync_recognize_with_profanity_filter_gcs(audio_uri: str) -> RecognizeResponse:
    """Recognizes speech from an audio file in Cloud Storage and filters out profane language.
    Args:
        audio_uri (str): The Cloud Storage URI of the input audio, e.g., gs://[BUCKET]/[FILE]
    Returns:
        cloud_speech.RecognizeResponse: The full response object which includes the transcription results.
    """
    # Define the audio source
    audio = {"uri": audio_uri}

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.FLAC,  # Audio format
        sample_rate_hertz=16000,
        language_code="en-US",
        # Enable profanity filter
        profanity_filter=True,
    )

    response = client.recognize(config=config, audio=audio)

    for result in response.results:
        alternative = result.alternatives[0]
        print(f"Transcript: {alternative.transcript}")

    return response.results