Internationalisation and Localisation Testing

Testing that software works correctly across languages, locales, character sets, and cultural conventions.

Testing that software works correctly across languages, locales, character sets, and cultural conventions.


i18n vs l10n

Internationalisation (i18n): designing software so it CAN be localised
  - Externalising all strings to translation files
  - Using Unicode throughout
  - Designing UI to handle text expansion (German ≈ 30% longer than English)
  - Handling RTL (right-to-left) layout switches

Localisation (l10n): adapting software for a specific locale
  - Translating strings
  - Adapting dates, numbers, currencies
  - Region-specific content (legal, phone formats, address formats)

Testing covers both:
  - Does the i18n infrastructure work? (pseudo-localisation)
  - Does each locale render correctly? (locale-specific tests)

Pseudo-Localisation Testing

# Pseudo-localisation replaces strings with visually distinct but readable characters
# Catches i18n issues without needing real translations

import re

def pseudolocalise(text: str) -> str:
    """
    Converts "Hello World" → "[Ĥéĺĺö Ŵörĺĉ!!!]"
    Brackets catch untranslated strings that appear without brackets.
    Accented chars catch charset encoding issues.
    !!! adds 30% length to catch UI truncation.
    """
    char_map = {
        'a': 'à', 'b': 'ƀ', 'c': 'ç', 'd': 'ď', 'e': 'é',
        'f': 'ƒ', 'g': 'ĝ', 'h': 'ĥ', 'i': 'ï', 'j': 'ĵ',
        'k': 'ķ', 'l': 'ĺ', 'm': 'm̂', 'n': 'ñ', 'o': 'ö',
        'p': 'þ', 'q': 'q̃', 'r': 'ŕ', 's': 'š', 't': 'ţ',
        'u': 'ü', 'v': 'v̂', 'w': 'ŵ', 'x': 'x̂', 'y': 'ŷ', 'z': 'ž',
    }
    pseudoloc = "".join(char_map.get(c.lower(), c).upper() if c.isupper()
                        else char_map.get(c, c)
                        for c in text)
    # Add 30% length padding and brackets
    pad = "!" * max(1, len(text) // 3)
    return f"[{pseudoloc}{pad}]"


# Playwright: run pseudo-locale against all pages
import pytest
from playwright.sync_api import Page

@pytest.mark.parametrize("path", ["/", "/checkout", "/account"])
def test_no_truncated_text(page: Page, path: str) -> None:
    page.goto(f"http://localhost:3000{path}?lang=pseudo")
    # Find any text that got clipped (scrollWidth > clientWidth)
    truncated = page.evaluate("""
        () => {
            const clipped = [];
            document.querySelectorAll('*').forEach(el => {
                if (el.scrollWidth > el.clientWidth + 2 && el.innerText?.trim()) {
                    clipped.push({tag: el.tagName, text: el.innerText.slice(0, 50)});
                }
            });
            return clipped;
        }
    """)
    assert not truncated, f"Truncated elements on {path}: {truncated}"

Locale-Specific Test Scenarios

# pytest parametrize across locales
import pytest
from dataclasses import dataclass

@dataclass
class LocaleConfig:
    code: str
    date_format: str
    currency: str
    number_separator: str    # thousands separator
    decimal_separator: str
    rtl: bool

LOCALES = [
    LocaleConfig("en-GB", "DD/MM/YYYY", "£", ",", ".", False),
    LocaleConfig("en-US", "MM/DD/YYYY", "$", ",", ".", False),
    LocaleConfig("de-DE", "DD.MM.YYYY", "€", ".", ",", False),
    LocaleConfig("ar-SA", "DD/MM/YYYY", "SAR", "،", "٫", True),
    LocaleConfig("ja-JP", "YYYY/MM/DD", "¥", ",", ".", False),
]

@pytest.mark.parametrize("locale", LOCALES, ids=lambda l: l.code)
def test_price_formatted_for_locale(page, locale: LocaleConfig) -> None:
    page.set_extra_http_headers({"Accept-Language": locale.code})
    page.goto("http://localhost:3000/products/widget")
    
    price_text = page.get_by_test_id("product-price").inner_text()
    
    # Verify currency symbol/code present
    assert locale.currency in price_text or locale.currency[:1] in price_text
    
    # Verify decimal separator style
    if locale.decimal_separator == ",":
        assert "," in price_text.split(locale.currency)[-1]

@pytest.mark.parametrize("locale", LOCALES, ids=lambda l: l.code)
def test_date_formatted_for_locale(page, locale: LocaleConfig) -> None:
    page.goto(f"http://localhost:3000/account/orders?locale={locale.code}")
    
    date_cells = page.get_by_test_id("order-date").all()
    if not date_cells:
        pytest.skip("No orders to check")
    
    date_text = date_cells[0].inner_text()
    # Simple check: does the format match expected pattern?
    parts = re.split(r"[/\-.]", date_text.strip())
    assert len(parts) == 3, f"Date {date_text!r} doesn't have 3 parts for {locale.code}"

RTL Layout Testing

def test_rtl_layout_correct(page: Page) -> None:
    page.set_extra_http_headers({"Accept-Language": "ar"})
    page.goto("http://localhost:3000/checkout")
    
    # Check dir attribute on html element
    html_dir = page.evaluate("document.documentElement.getAttribute('dir')")
    assert html_dir == "rtl", f"Expected 'rtl' dir, got {html_dir!r}"
    
    # Navigation should be on the right in RTL
    nav = page.get_by_role("navigation")
    nav_box = nav.bounding_box()
    viewport = page.viewport_size
    assert nav_box["x"] > viewport["width"] / 2, "Navigation should be right-aligned in RTL"
    
    # Form fields should have text-align: right
    email_field = page.get_by_label("البريد الإلكتروني")  # "Email" in Arabic
    text_align = email_field.evaluate("el => getComputedStyle(el).textAlign")
    assert text_align in ("right", "end"), f"Input text-align should be right in RTL, got {text_align}"

Character Set and Encoding Tests

UNICODE_TEST_STRINGS = [
    ("Arabic",  "مرحباً بالعالم"),
    ("Chinese", "你好世界"),
    ("Japanese", "こんにちは世界"),
    ("Korean",  "안녕하세요 세계"),
    ("Emoji",   "Hello 🌍 World 🚀"),
    ("Mixed RTL", "Hello مرحبا World"),
    ("Long",    "A" * 500),                  # text truncation
    ("Special", "O'Brien & <script>alert(1)"),  # injection probe
    ("Null-ish", "Hello�World"),         # null byte handling
    ("Surrogate", "𝕳𝖊𝖑𝖑𝖔"),               # supplementary plane chars
]

@pytest.mark.parametrize("label,name", UNICODE_TEST_STRINGS)
def test_user_name_round_trips_correctly(api_client, label: str, name: str) -> None:
    """Create user with unicode name, verify it survives storage and retrieval."""
    user = api_client.post("/users", json={"name": name, "email": "test@example.com"})
    assert user.status_code == 201, f"Failed to create user with {label} name"
    
    fetched = api_client.get(f"/users/{user.json()['id']}")
    assert fetched.json()["name"] == name, (
        f"Name round-trip failed for {label}: expected {name!r}, got {fetched.json()['name']!r}"
    )

Locale Test Checklist

Before release in a new locale, verify:

Formatting
  [ ] Dates display in locale format (DD/MM vs MM/DD)
  [ ] Numbers use correct thousands and decimal separators
  [ ] Currency shows correct symbol and position (£99.99 vs 99,99 €)
  [ ] Phone numbers format correctly for the locale

Content
  [ ] All user-facing strings are translated (no English leakage)
  [ ] Translated text fits in UI without clipping (check pseudo-loc first)
  [ ] Images and icons don't contain locale-specific text
  [ ] Legal content (T&Cs, privacy) is locale-appropriate

RTL (if applicable)
  [ ] Layout direction reverses completely
  [ ] Icons that imply direction (arrows, chevrons) flip correctly
  [ ] Text alignment correct in all form fields

Input
  [ ] Locale-specific keyboard inputs accepted (e.g., accented chars)
  [ ] Copy-paste from locale OS works correctly
  [ ] Address form fields match locale conventions

Data
  [ ] Unicode names survive round-trip (store, retrieve, display)
  [ ] Sorting is locale-aware (ä sorts with a in German, not after z)
  [ ] Case conversion works for locale (Turkish dotted/dotless i)

Common Failure Cases

Pseudo-localisation skipped because "real translations are ready" Why: teams substitute pseudo-loc with actual translations and lose the structural verification that pseudo-loc provides. Detect: UI truncation bugs are reported only after localisation is complete, not before. Fix: run pseudo-loc as a mandatory CI step before any real locale is wired up; treat it as a structural test, not a translation preview.

Hard-coded en-US date format in backend API response Why: the API serialises dates as MM/DD/YYYY strings instead of ISO 8601, so clients cannot reformat them for the locale. Detect: test_date_formatted_for_locale passes for en-US but fails for de-DE and ja-JP. Fix: always return dates as ISO 8601 from the API and let the frontend format them using Intl.DateTimeFormat with the active locale.

RTL layout partially applied — only dir attribute set, CSS not updated Why: the developer sets <html dir="rtl"> but flexbox/grid containers use margin-left/float: left instead of logical properties, so elements don't mirror. Detect: test_rtl_layout_correct passes the dir assertion but fails the nav position check. Fix: replace directional CSS properties with CSS logical properties (margin-inline-start, padding-inline-end) throughout and verify with an Arabic locale test run.

Unicode names corrupt at the database boundary due to wrong collation Why: the database column is created with a non-UTF8 collation (e.g., latin1 in MySQL), silently dropping or mangling multibyte characters. Detect: test_user_name_round_trips_correctly fails for Chinese and Japanese labels while ASCII labels pass. Fix: set the database character set to utf8mb4 and collation to utf8mb4_unicode_ci for all string columns; verify with the parametrised round-trip test suite.

Locale test matrix too narrow — only happy-path locales tested Why: the test suite covers en-US and en-GB but not the locales with structurally different formatting (RTL, non-Latin decimal separators, non-Gregorian calendars). Detect: production bug reports cluster on ar-SA, he-IL, or ja-JP locales that were never in the test matrix. Fix: add at minimum one RTL locale, one locale with comma-as-decimal (e.g., de-DE), and one CJK locale to the parametrised test matrix.

Connections

qa-hub · qa/cross-browser-testing · qa/accessibility-testing · qa/test-case-design · qa/compliance-testing

Open Questions

  • What testing scenarios does this technique systematically miss?
  • How does this approach need to change when delivery cadence moves to continuous deployment?