diff --git a/Cargo.toml b/Cargo.toml index a03f800..a4198c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,11 @@ quick-xml = "0.37" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" thiserror = "1.0" +ipa-translate = { version = "0.2", optional = true } + +[features] +default = ["phonetic-translation"] +phonetic-translation = ["dep:ipa-translate"] [dev-dependencies] # Removed problematic benchmark dependencies for now diff --git a/src/formatters/ssml/base.rs b/src/formatters/ssml/base.rs index 79031cb..5fd4496 100644 --- a/src/formatters/ssml/base.rs +++ b/src/formatters/ssml/base.rs @@ -600,6 +600,14 @@ impl SsmlFormatterBase { } attrs })), + "xsampa" | "praat" | "sil" | "branner" => { + let translated = translate_to_ipa(&key.to_lowercase(), value)?; + let mut attrs = vec![("alphabet".to_string(), "ipa".to_string())]; + if !translated.is_empty() { + attrs.push(("ph".to_string(), translated)); + } + Some(("phoneme".to_string(), attrs)) + } "sub" => { if !value.is_empty() { attributes.push(("alias".to_string(), value.to_string())); @@ -735,6 +743,33 @@ impl SsmlFormatterBase { } } +/// Translate a phonetic-alphabet value to IPA. +/// +/// Recognized keys (lowercase): `xsampa`, `praat`, `sil`, `branner`. Returns +/// `None` for any unrecognized key, or for every key when the +/// `phonetic-translation` feature is disabled — callers treat `None` as +/// "drop this modifier." +/// +/// Conversion is infallible (garbage in, garbage out) per the upstream crate. +#[cfg(feature = "phonetic-translation")] +pub fn translate_to_ipa(key: &str, value: &str) -> Option { + if value.is_empty() { + return Some(String::new()); + } + match key { + "xsampa" => Some(ipa_translate::xsampa_to_ipa(value)), + "praat" => Some(ipa_translate::praat_to_ipa(value)), + "sil" => Some(ipa_translate::sil_to_ipa(value)), + "branner" => Some(ipa_translate::branner_to_ipa(value)), + _ => None, + } +} + +#[cfg(not(feature = "phonetic-translation"))] +pub fn translate_to_ipa(_key: &str, _value: &str) -> Option { + None +} + pub fn format_attr_string_ordered(tag_name: &str, attributes: &TagAttrs) -> String { let fixed_order: Vec<&str> = match tag_name { "say-as" => vec!["interpret-as", "format"], @@ -789,3 +824,64 @@ impl Formatter for SsmlFormatterBase { self.format_node_with_tags(node) } } + +#[cfg(test)] +mod phonetic_alphabet_tests { + use super::*; + use crate::formatters::base::FormatterOptions; + + fn fmt() -> SsmlFormatterBase { + SsmlFormatterBase::new(FormatterOptions::default()) + } + + #[cfg(feature = "phonetic-translation")] + #[test] + fn xsampa_value_becomes_ipa_phoneme_tag() { + let (tag, attrs) = fmt().attribute_to_tag("xsampa", "spitS").unwrap(); + assert_eq!(tag, "phoneme"); + assert_eq!(attrs_get(&attrs, "alphabet"), Some("ipa")); + assert_eq!(attrs_get(&attrs, "ph"), Some("spitʃ")); + } + + #[cfg(feature = "phonetic-translation")] + #[test] + fn praat_sil_branner_all_emit_ipa_phoneme() { + let cases = [ + ("praat", r"p\rta\:ft\^h", "pɹaːtʰ"), + ("sil", "si=l", "sɪl"), + ("branner", "br&ae):nE&r^", "bɹæːnɜ˞"), + ]; + for (key, src, expected_ipa) in cases { + let (tag, attrs) = fmt().attribute_to_tag(key, src).unwrap(); + assert_eq!(tag, "phoneme", "key {}", key); + assert_eq!(attrs_get(&attrs, "alphabet"), Some("ipa"), "key {}", key); + assert_eq!( + attrs_get(&attrs, "ph"), + Some(expected_ipa), + "key {}", + key + ); + } + } + + #[cfg(not(feature = "phonetic-translation"))] + #[test] + fn phonetic_keys_dropped_when_feature_disabled() { + for key in ["xsampa", "praat", "sil", "branner"] { + assert!( + fmt().attribute_to_tag(key, "anything").is_none(), + "{} should be dropped without phonetic-translation feature", + key + ); + } + } + + #[cfg(feature = "phonetic-translation")] + #[test] + fn empty_value_emits_phoneme_without_ph_attr() { + let (tag, attrs) = fmt().attribute_to_tag("xsampa", "").unwrap(); + assert_eq!(tag, "phoneme"); + assert_eq!(attrs_get(&attrs, "alphabet"), Some("ipa")); + assert_eq!(attrs_get(&attrs, "ph"), None); + } +} diff --git a/src/formatters/ssml/google_assistant.rs b/src/formatters/ssml/google_assistant.rs index 1a720a7..2d567ce 100644 --- a/src/formatters/ssml/google_assistant.rs +++ b/src/formatters/ssml/google_assistant.rs @@ -26,7 +26,7 @@ impl GoogleAssistantSsmlFormatter { } "excited" | "disappointed" => None, "voice" | "lang" => None, - "ipa" => None, + "ipa" | "xsampa" | "praat" | "sil" | "branner" => None, "style" => { if !value.is_empty() { attributes.push(("name".to_string(), value.to_string())); @@ -40,8 +40,8 @@ impl GoogleAssistantSsmlFormatter { fn format_google_text_modifier(&self, node: &AstNode) -> Result { let mut tags: Vec = Vec::new(); let mut last_say_as: Option = None; - let mut has_ipa = false; - let mut non_ipa_count = 0; + let mut phonetic_key: Option = None; + let mut non_phonetic_count = 0; for key in &node.attribute_keys { let value = match node.attributes.get(key) { @@ -49,10 +49,15 @@ impl GoogleAssistantSsmlFormatter { None => continue, }; - if key.to_lowercase() == "ipa" { - has_ipa = true; + if matches!( + key.to_lowercase().as_str(), + "ipa" | "xsampa" | "praat" | "sil" | "branner" + ) { + if phonetic_key.is_none() { + phonetic_key = Some(key.clone()); + } } else { - non_ipa_count += 1; + non_phonetic_count += 1; } if let Some(tag_info) = self.google_attribute_to_tag(key, value) { @@ -71,15 +76,13 @@ impl GoogleAssistantSsmlFormatter { } } - if has_ipa && non_ipa_count == 0 { + if phonetic_key.is_some() && non_phonetic_count == 0 { return Ok(node.text.clone()); } - if has_ipa { - if let Some(ipa_tag) = self - .base - .attribute_to_tag("ipa", node.attributes.get("ipa").unwrap_or(&String::new())) - { + if let Some(key) = phonetic_key { + let value = node.attributes.get(&key).cloned().unwrap_or_default(); + if let Some(ipa_tag) = self.base.attribute_to_tag(&key, &value) { tags.push(ipa_tag); } } diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 25b8e3f..414a3c4 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -31,6 +31,21 @@ fn test_all_test_cases() { .and_then(|n| n.to_str()) .unwrap_or("unknown"); + // Fixtures that exercise the phonetic-translation feature; only + // run them when that feature is on. + #[cfg(not(feature = "phonetic-translation"))] + if matches!( + test_name, + "xsampa-standard" + | "xsampa-stress" + | "praat-standard" + | "sil-standard" + | "branner-standard" + | "combo-xsampa-emphasis" + ) { + continue; + } + // Read the .smd input file let smd_file = test_dir.join(format!("{}.smd", test_name)); if !smd_file.exists() { diff --git a/tests/test-data b/tests/test-data index 6145bce..b7d247f 160000 --- a/tests/test-data +++ b/tests/test-data @@ -1 +1 @@ -Subproject commit 6145bcec92b574103447c9d6566ce67c6cf4402f +Subproject commit b7d247f0900eb52a96b983c8b2e04cc14331f06a