Skip to content

Commit

Permalink
fix(localize): enable whitespace preservation marker in XLIFF files
Browse files Browse the repository at this point in the history
Whitespace can be relevant in extracted XLIFF translation files.
Some i18n tools - e.g. CAT tool (OmegaT) - will reformat
the file to collapse whitespace if there is no indication to tell it
not to.

This commit adds the ability to specify "format options" that are passed
to the translation file serializer. The XLIFF 1.2 and 2.0 seralizers have
been updated to accept `{"xml:space":"preserve"}` format option which will
by added to the `<file>` element in the serialized translation file during
extraction.

Fixes #38679
  • Loading branch information
petebacondarwin committed Sep 8, 2020
1 parent c90eb54 commit dbd95b2
Show file tree
Hide file tree
Showing 12 changed files with 521 additions and 360 deletions.
Expand Up @@ -14,7 +14,7 @@ fs.writeFileSync(filePath + '.bak', contents, 'utf8');

// Write translated file
const updated =
contents.replace(/source>/g, 'target>')
contents.replace(/<(\/)?source([^>]*)>/g, '<$1target$2>')
.replace(/Hello/g, 'Bonjour')
.replace(/source-language="([^"]+)"/g, 'source-language="$1" target-language="legacy"');
fs.writeFileSync(filePath, updated, 'utf8');
26 changes: 21 additions & 5 deletions packages/localize/src/tools/src/extract/main.ts
Expand Up @@ -21,6 +21,7 @@ import {SimpleJsonTranslationSerializer} from './translation_files/json_translat
import {Xliff1TranslationSerializer} from './translation_files/xliff1_translation_serializer';
import {Xliff2TranslationSerializer} from './translation_files/xliff2_translation_serializer';
import {XmbTranslationSerializer} from './translation_files/xmb_translation_serializer';
import {FormatOptions, parseFormatOptions} from './translation_files/format_options';

if (require.main === module) {
const args = process.argv.slice(2);
Expand Down Expand Up @@ -54,6 +55,13 @@ if (require.main === module) {
describe: 'The format of the translation file.',
type: 'string',
})
.option('formatOptions', {
describe:
'Additional options to pass to the translation file serializer, in the form of JSON formatted key-value string pairs:\n' +
'For example: `--formatOptions {"xml:space":"preserve"}.\n' +
'The meaning of the options is specific to the format being serialized.',
type: 'string'
})
.option('o', {
alias: 'outputPath',
required: true,
Expand Down Expand Up @@ -97,6 +105,7 @@ if (require.main === module) {
const logLevel = options.loglevel as (keyof typeof LogLevel) | undefined;
const logger = new ConsoleLogger(logLevel ? LogLevel[logLevel] : LogLevel.warn);
const duplicateMessageHandling = options.d as DiagnosticHandlingStrategy;
const formatOptions = parseFormatOptions(options.formatOptions);


extractTranslations({
Expand All @@ -109,6 +118,7 @@ if (require.main === module) {
useSourceMaps: options.useSourceMaps,
useLegacyIds: options.useLegacyIds,
duplicateMessageHandling,
formatOptions,
});
}

Expand Down Expand Up @@ -152,6 +162,10 @@ export interface ExtractTranslationsOptions {
* How to handle messages with the same id but not the same text.
*/
duplicateMessageHandling: DiagnosticHandlingStrategy;
/**
* A collection of formatting options to pass to the translation file serializer.
*/
formatOptions?: FormatOptions;
}

export function extractTranslations({
Expand All @@ -164,6 +178,7 @@ export function extractTranslations({
useSourceMaps,
useLegacyIds,
duplicateMessageHandling,
formatOptions = {},
}: ExtractTranslationsOptions) {
const fs = getFileSystem();
const basePath = fs.resolve(rootPath);
Expand All @@ -180,7 +195,8 @@ export function extractTranslations({
}

const outputPath = fs.resolve(rootPath, output);
const serializer = getSerializer(format, sourceLocale, fs.dirname(outputPath), useLegacyIds);
const serializer =
getSerializer(format, sourceLocale, fs.dirname(outputPath), useLegacyIds, formatOptions);
const translationFile = serializer.serialize(messages);
fs.ensureDir(fs.dirname(outputPath));
fs.writeFile(outputPath, translationFile);
Expand All @@ -191,17 +207,17 @@ export function extractTranslations({
}

export function getSerializer(
format: string, sourceLocale: string, rootPath: AbsoluteFsPath,
useLegacyIds: boolean): TranslationSerializer {
format: string, sourceLocale: string, rootPath: AbsoluteFsPath, useLegacyIds: boolean,
formatOptions: FormatOptions): TranslationSerializer {
switch (format) {
case 'xlf':
case 'xlif':
case 'xliff':
return new Xliff1TranslationSerializer(sourceLocale, rootPath, useLegacyIds);
return new Xliff1TranslationSerializer(sourceLocale, rootPath, useLegacyIds, formatOptions);
case 'xlf2':
case 'xlif2':
case 'xliff2':
return new Xliff2TranslationSerializer(sourceLocale, rootPath, useLegacyIds);
return new Xliff2TranslationSerializer(sourceLocale, rootPath, useLegacyIds, formatOptions);
case 'xmb':
return new XmbTranslationSerializer(rootPath, useLegacyIds);
case 'json':
Expand Down
@@ -0,0 +1,44 @@
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/

export type FormatOptions = Record<string, string>;
export type ValidOption = [string, string[]];
export type ValidOptions = ValidOption[];

/**
* Check that the given `options` are allowed based on the given `validOptions`.
* @param name The name of the serializer that is receiving the options.
* @param validOptions An array of valid options and their allowed values.
* @param options The options to be validated.
*/
export function validateOptions(name: string, validOptions: ValidOptions, options: FormatOptions) {
const validOptionsMap = new Map<ValidOption[0], ValidOption[1]>(validOptions);
for (const option in options) {
if (!validOptionsMap.has(option)) {
throw new Error(
`Invalid format option for ${name}: "${option}".\n` +
`Allowed options are ${JSON.stringify(Array.from(validOptionsMap.keys()))}.`);
}
const validOptionValues = validOptionsMap.get(option)!;
const optionValue = options[option];
if (!validOptionValues.includes(optionValue)) {
throw new Error(
`Invalid format option value for ${name}: "${option}".\n` +
`Allowed option values are ${JSON.stringify(validOptionValues)} but received "${
optionValue}".`);
}
}
}

/**
* Parse the given `optionString` into a collection of `FormatOptions`.
* @param optionString The string to parse.
*/
export function parseFormatOptions(optionString: string = '{}'): FormatOptions {
return JSON.parse(optionString);
}
Expand Up @@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';

import {validateOptions} from './format_options';
import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
Expand All @@ -24,9 +25,12 @@ const LEGACY_XLIFF_MESSAGE_LENGTH = 40;
* @see Xliff1TranslationParser
*/
export class Xliff1TranslationSerializer implements TranslationSerializer {
static validFormatOptions = new Map<string, string[]>([['xml:space', ['preserve']]]);
constructor(
private sourceLocale: string, private basePath: AbsoluteFsPath,
private useLegacyIds: boolean) {}
private sourceLocale: string, private basePath: AbsoluteFsPath, private useLegacyIds: boolean,
private formatOptions: Record<string, string>) {
validateOptions('Xliff1TranslationSerializer', [['xml:space', ['preserve']]], formatOptions);
}

serialize(messages: ɵParsedMessage[]): string {
const ids = new Set<string>();
Expand All @@ -43,6 +47,7 @@ export class Xliff1TranslationSerializer implements TranslationSerializer {
'source-language': this.sourceLocale,
'datatype': 'plaintext',
'original': 'ng2.template',
...this.formatOptions,
});
xml.startTag('body');
for (const message of messages) {
Expand Down
Expand Up @@ -8,6 +8,7 @@
import {AbsoluteFsPath, relative} from '@angular/compiler-cli/src/ngtsc/file_system';
import {ɵParsedMessage, ɵSourceLocation} from '@angular/localize';

import {validateOptions} from './format_options';
import {extractIcuPlaceholders} from './icu_parsing';
import {TranslationSerializer} from './translation_serializer';
import {XmlFile} from './xml_file';
Expand All @@ -25,8 +26,10 @@ const MAX_LEGACY_XLIFF_2_MESSAGE_LENGTH = 20;
export class Xliff2TranslationSerializer implements TranslationSerializer {
private currentPlaceholderId = 0;
constructor(
private sourceLocale: string, private basePath: AbsoluteFsPath,
private useLegacyIds: boolean) {}
private sourceLocale: string, private basePath: AbsoluteFsPath, private useLegacyIds: boolean,
private formatOptions: Record<string, string>) {
validateOptions('Xliff1TranslationSerializer', [['xml:space', ['preserve']]], formatOptions);
}

serialize(messages: ɵParsedMessage[]): string {
const ids = new Set<string>();
Expand All @@ -41,8 +44,9 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
// We could compute the file from the `message.location` property, but there could
// be multiple values for this in the collection of `messages`. In that case we would probably
// need to change the serializer to output a new `<file>` element for each collection of
// messages that come from a particular original file, and the translation file parsers may not
xml.startTag('file', {'id': 'ngi18n', 'original': 'ng.template'});
// messages that come from a particular original file, and the translation file parsers may
// not
xml.startTag('file', {'id': 'ngi18n', 'original': 'ng.template', ...this.formatOptions});
for (const message of messages) {
const id = this.getMessageId(message);
if (ids.has(id)) {
Expand Down Expand Up @@ -149,8 +153,8 @@ export class Xliff2TranslationSerializer implements TranslationSerializer {
*
* Otherwise return the canonical message id.
*
* An Xliff 2.0 legacy message id is a 64 bit number encoded as a decimal string, which will have
* at most 20 digits, since 2^65-1 = 36,893,488,147,419,103,231. This digest is based on:
* An Xliff 2.0 legacy message id is a 64 bit number encoded as a decimal string, which will
* have at most 20 digits, since 2^65-1 = 36,893,488,147,419,103,231. This digest is based on:
* https://github.com/google/closure-compiler/blob/master/src/com/google/javascript/jscomp/GoogleJsMessageIdGenerator.java
*/
private getMessageId(message: ɵParsedMessage): string {
Expand Down
1 change: 1 addition & 0 deletions packages/localize/src/tools/test/BUILD.bazel
Expand Up @@ -6,6 +6,7 @@ ts_library(
srcs = glob(
["**/*.ts"],
),
visibility = ["//packages/localize/src/tools/test:__subpackages__"],
deps = [
"//packages:types",
"//packages/compiler",
Expand Down
Expand Up @@ -14,6 +14,7 @@ ts_library(
"//packages/compiler-cli/src/ngtsc/logging/testing",
"//packages/compiler-cli/test/helpers",
"//packages/localize/src/tools",
"//packages/localize/src/tools/test:test_lib",
],
)

Expand Down

0 comments on commit dbd95b2

Please sign in to comment.