Implement advanced subsetting settings

This commit is contained in:
be5invis 2025-05-03 12:05:05 -07:00
parent 181890af8e
commit 862138b306
10 changed files with 342 additions and 127 deletions

4
.gitignore vendored
View file

@ -10,6 +10,10 @@ node_modules
dist
release-archives
# Test artifacts
.test
.coverage
# Generated files
packages/*/lib
packages/font-glyphs/src/**/*.mjs

View file

@ -91,7 +91,7 @@ Build plan could have 9 optional subsections:
* `widths`
* `slopes`
* `compatibilityLigatures`
* `excludeChars`
* `subset`
* `metricOverride`
* `namingOverride`
@ -3839,12 +3839,31 @@ featureTag = 'calt'
sequence = '<*>'
```
#### Excluding Characters
#### Subsettingn the Result
Use the `excludeChars` configuration to exclude character ranges from the font.
Use the `subset` configuration to limit the character set in the final font. The `subset` section is an object with either `include` or `exclude` property.
- When `include` is used, only the characters in the list will be included in the final font.
- When `exclude` is used, all characters except those in the list will be included in the final font.
- When both `include` and `exclude` are used, the final font will include only the characters in `include`, and without those in `exclude`.
In `include` or `exclude`, we could specify the character set in thre ways:
- `textFile`: the path to a text file containing the characters to include/exclude. The file should be in UTF-8 encoding.
- `text`: a string containing the characters to include/exclude.
- `ranges`: a list of ranges of code points to include/exclude. Each range is a list of two integers, representing the start and end code points (inclusive).
Sample of `include`:
```toml
[buildPlans.iosevkaCustom.excludeChars]
[buildPlans.iosevkaCustom.subset.include]
textFile = "agl.txt"
```
Sample of `exclude`:
```toml
[buildPlans.iosevkaCustom.subset.exclude]
ranges = [[10003, 10008]]
```

View file

@ -1,6 +1,7 @@
import { buildGlyphs } from "@iosevka/font-glyphs";
import { copyFontMetrics } from "@iosevka/font-glyphs/aesthetics";
import { buildOtl } from "@iosevka/font-otl";
import { createSubsetFilter } from "@iosevka/param";
import { cleanupGlyphStore } from "../cleanup/index.mjs";
import { CreateEmptyFont } from "../font-io/index.mjs";
@ -23,13 +24,8 @@ export async function buildFont(para, cache) {
const otl = buildOtl(para, gs.glyphStore);
// Regulate (like geometry conversion)
const excludeChars = new Set();
if (para.excludedCharRanges) {
for (const [start, end] of para.excludedCharRanges) {
for (let p = start; p <= end; p++) excludeChars.add(p);
}
}
const cleanGs = cleanupGlyphStore(cache, para, gs.glyphStore, excludeChars, otl);
const sf = await createSubsetFilter(para.subset, para.excludedCharRanges);
const cleanGs = cleanupGlyphStore(cache, para, gs.glyphStore, sf, otl);
// Convert to TTF
const font = await convertOtd(baseFont, otl, cleanGs);

View file

@ -2,60 +2,108 @@ import * as Geometry from "@iosevka/geometry";
import { Transform } from "@iosevka/geometry/transform";
import { VS01 } from "@iosevka/glyph/relation";
export function gcFont(glyphStore, excludedChars, otl) {
const daGsub = markSweepOtlLookups(otl.GSUB);
markSweepOtlLookups(otl.GPOS);
const sink = markGlyphs(glyphStore, excludedChars, otl, daGsub);
return sweepGlyphs(glyphStore, sink);
/// This function will remove all the glyphs outside the subset filter, and all the lookups that are
/// unreachable or emptied by the subsetting process.
///
/// Note that, given the process, glyph markin and lookup marking are entangled. Thus we perform
/// a looped marking process.
export function gcFont(glyphStore, subsetFilter, otl) {
let markedGlyphNames = createGlyphDepthMapForAllGlyphs(glyphStore);
let sizeBefore = markedGlyphNames.size;
let sizeAfter = sizeBefore;
// Set of accessible and directly accessible lookups
let aGsub = new Set(),
daGsub = new Set();
let aGpos = new Set(),
daGpos = new Set();
do {
sizeBefore = sizeAfter;
[aGsub, daGsub] = markLookups(otl.GSUB, markedGlyphNames);
[aGpos, daGpos] = markLookups(otl.GPOS, markedGlyphNames);
markedGlyphNames = markGlyphs(glyphStore, subsetFilter, otl, daGsub);
sizeAfter = markedGlyphNames.size;
} while (sizeAfter < sizeBefore);
analyzeReferenceGraph(glyphStore, markedGlyphNames);
sweepOtlTable(otl.GSUB, aGsub);
sweepOtlTable(otl.GPOS, aGpos);
return sweepGlyphs(glyphStore, markedGlyphNames);
}
function createGlyphDepthMapForAllGlyphs(glyphStore) {
let map = new Map();
for (const [gName, g] of glyphStore.namedEntries()) map.set(gName, 1);
return map;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
function markSweepOtlLookups(table) {
if (!table || !table.features || !table.lookups) return;
const accessibleLookupsIds = new Set();
const directAccessibleLookupsIds = new Set();
markLookups(table, accessibleLookupsIds, directAccessibleLookupsIds);
sweepLookups(table, accessibleLookupsIds);
sweepFeatures(table, accessibleLookupsIds);
return directAccessibleLookupsIds;
}
function markLookups(table, sink, sinkDirect) {
/// This function will mark all the lookups that are reachable and non-empty from the marked glyphs.
/// It will also mark the lookups that are directly reachable from the features.
function markLookups(table, markedGlyphs) {
if (!table || !table.features) return;
markLookupsStart(table, sink, sinkDirect);
let loop = 0,
lookupSetChanged = false;
const reachableLookups = new Set();
const directReachableLookups = new Set();
markLookupsStart(table, markedGlyphs, reachableLookups, directReachableLookups);
let sizeBefore = reachableLookups.size,
sizeAfter = sizeBefore;
do {
lookupSetChanged = false;
let sizeBefore = sink.size;
for (const l of Array.from(sink)) {
const lookup = table.lookups[l];
if (!lookup) continue;
if (lookup.type === "gsub_chaining" || lookup.type === "gpos_chaining") {
for (let st of lookup.rules) {
if (!st || !st.apply) continue;
for (const app of st.apply) {
if (!app.lookup.name)
throw new Error("Unreachable: lookup name should be present");
sink.add(app.lookup.name);
}
}
}
sizeBefore = sizeAfter;
for (const l of Array.from(reachableLookups)) {
markLookupIndirect(table, l, markedGlyphs, reachableLookups);
}
loop++;
lookupSetChanged = sizeBefore !== sink.size;
} while (loop < 0xff && lookupSetChanged);
sizeAfter = reachableLookups.size;
} while (sizeAfter > sizeBefore);
return [reachableLookups, directReachableLookups];
}
function markLookupsStart(table, sink, sinkDirect) {
function markLookupsStart(table, markedGlyphs, sink, sinkDirect) {
for (let f in table.features) {
const feature = table.features[f];
if (!feature) continue;
for (const l of feature.lookups) {
if (isLookupEmpty(table, l, markedGlyphs)) continue;
sink.add(l);
sinkDirect.add(l);
}
}
}
function markLookupIndirect(gsub, lid, markedGlyphs, reachableLookups) {
const lookup = gsub.lookups[lid];
if (!lookup) return;
if (lookup.type !== "gsub_chaining" && lookup.type != "gpos_chaining") return;
for (const rule of lookup.rules) {
if (!rule || !rule.apply) continue;
for (const app of rule.apply) {
if (!app.lookup.name) throw new Error("Unreachable: lookup name should be present");
if (isLookupEmpty(gsub, app.lookup.name, markedGlyphs)) continue;
reachableLookups.add(app.lookup.name);
}
}
}
function isLookupEmpty(gsub, lid, markedGlyphs) {
const lookup = gsub.lookups[lid];
if (!lookup) return true;
const handler = LookupTypehHanlderMap[lookup.type];
if (!handler) return false;
return handler.isEmpty(gsub, lookup, markedGlyphs);
}
function sweepOtlTable(table, accessibleLookupsIds) {
if (!table || !table.features || !table.lookups) return;
sweepLookups(table, accessibleLookupsIds);
sweepFeatures(table, accessibleLookupsIds);
}
function sweepLookups(table, accessibleLookupsIds) {
let lookups1 = {};
for (const l in table.lookups) {
@ -85,12 +133,11 @@ function sweepFeatures(table, accessibleLookupsIds) {
///////////////////////////////////////////////////////////////////////////////////////////////////
function markGlyphs(glyphStore, excludedChars, otl, daGsub) {
const markedGlyphs = markGlyphsInitial(glyphStore, excludedChars);
function markGlyphs(glyphStore, subsetFilter, otl, daGsub) {
const markedGlyphs = markGlyphsInitial(glyphStore, subsetFilter);
while (markGlyphsGr(glyphStore, markedGlyphs, otl));
if (otl.GSUB) markGlyphsByGsub(otl.GSUB, markedGlyphs, daGsub);
while (markGlyphsGr(glyphStore, markedGlyphs, otl));
analyzeReferenceGraph(glyphStore, markedGlyphs);
return markedGlyphs;
}
@ -99,7 +146,7 @@ function markSingleGlyph(markedGlyphs, gName, d) {
if (!existing || d < existing) markedGlyphs.set(gName, d);
}
function markGlyphsInitial(glyphStore, excludedChars) {
function markGlyphsInitial(glyphStore, subsetFilter) {
let markedGlyphs = new Map();
for (const [gName, g] of glyphStore.namedEntries()) {
if (!g) continue;
@ -108,7 +155,7 @@ function markGlyphsInitial(glyphStore, excludedChars) {
const unicodeSet = glyphStore.queryUnicodeOf(g);
if (unicodeSet) {
for (const u of unicodeSet) {
if (excludedChars.has(u)) continue;
if (!subsetFilter.isCharacterIncluded(u)) continue;
let d = Math.max(1, Math.min(u, 0xffff) >> 4);
markSingleGlyph(markedGlyphs, gName, d);
}
@ -142,76 +189,9 @@ function markGlyphsByGsub(gsub, markedGlyphs, daGsub) {
function markGlyphsByLookup(gsub, lid, markedGlyphs) {
const lookup = gsub.lookups[lid];
if (!lookup) return;
switch (lookup.type) {
case "gsub_single":
return markGlyphsGsubSingle(markedGlyphs, lookup);
case "gsub_multiple":
return markGlyphsGsubMultiple(markedGlyphs, lookup);
case "gsub_alternate":
return markGlyphsGsubAlternate(markedGlyphs, lookup);
case "gsub_ligature":
return markGlyphsGsubLigature(markedGlyphs, lookup);
case "gsub_chaining": {
rules: for (const rule of lookup.rules) {
// Check whether all match coverages has at least one glyph in the sink
for (const m of rule.match) {
let atLeastOneMatch = false;
for (const matchGlyph of m)
if (markedGlyphs.has(matchGlyph)) atLeastOneMatch = true;
if (!atLeastOneMatch) continue rules;
}
// If so traverse through the lookup applications
for (const app of rule.apply) {
if (!app.lookup.name)
throw new Error("Unreachable: lookup name should be present");
markGlyphsByLookup(gsub, app.lookup.name, markedGlyphs);
}
}
break;
}
case "gsub_reverse":
return markGlyphsGsubReverse(markedGlyphs, lookup);
}
}
function markGlyphsGsubSingle(markedGlyphs, lookup) {
const st = lookup.substitutions;
for (const k in st) {
const d = markedGlyphs.get(k);
if (d && st[k]) markSingleGlyph(markedGlyphs, st[k], d + 0x1000);
}
}
function markGlyphsGsubMultiple(markedGlyphs, lookup) {
const st = lookup.substitutions;
for (const k in st) {
const d = markedGlyphs.get(k);
if (d && st[k]) for (const g of st[k]) markSingleGlyph(markedGlyphs, g, d + 0x1000);
}
}
function markGlyphsGsubAlternate(markedGlyphs, lookup) {
markGlyphsGsubMultiple(markedGlyphs, lookup);
}
function markGlyphsGsubLigature(markedGlyphs, lookup) {
const st = lookup.substitutions;
for (const sub of st) {
let maxD = 0;
for (const g of sub.from) {
const d = markedGlyphs.get(g);
if (d && d > maxD) maxD = d;
}
if (maxD && sub.to) markSingleGlyph(markedGlyphs, sub.to, maxD + 0x1000);
}
}
function markGlyphsGsubReverse(markedGlyphs, lookup) {
for (const rule of lookup.rules) {
if (rule.match && rule.to) {
const matchCoverage = rule.match[rule.inputIndex];
for (let j = 0; j < matchCoverage.length; j++) {
const d = markedGlyphs.get(matchCoverage[j]);
if (d && rule.to[j]) markSingleGlyph(markedGlyphs, rule.to[j], d + 0x1000);
}
}
}
const handler = LookupTypehHanlderMap[lookup.type];
if (!handler) return;
handler.markGlyphs(gsub, lookup, markedGlyphs);
}
function sweepGlyphs(glyphStore, gnSet) {
@ -220,6 +200,156 @@ function sweepGlyphs(glyphStore, gnSet) {
///////////////////////////////////////////////////////////////////////////////////////////////////
// OTL lookup handlers
const GsubSingle = {
isEmpty(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const k in st) if (markedGlyphs.has(k)) return false;
return true;
},
markGlyphs(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const k in st) {
const d = markedGlyphs.get(k);
if (d && st[k]) markSingleGlyph(markedGlyphs, st[k], d + 0x1000);
}
},
};
const GsubMultipleAlternate = {
isEmpty(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const k in st) if (markedGlyphs.has(k)) return false;
return true;
},
markGlyphs(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const k in st) {
const d = markedGlyphs.get(k);
if (d && st[k]) for (const g of st[k]) markSingleGlyph(markedGlyphs, g, d + 0x1000);
}
},
};
const GsubLigature = {
isEmpty(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const sub of st) {
// Check all of the glyphs are in the marked glyphs set
let allGlyphsInSet = true;
for (const g of sub.from) {
if (!markedGlyphs.has(g)) {
allGlyphsInSet = false;
break;
}
}
// If all glyphs are in the marked glyphs set, then this substitution is valid,
// thus the lookup is not empty. Return false.
if (allGlyphsInSet) return false;
}
return true;
},
markGlyphs(gsub, lookup, markedGlyphs) {
const st = lookup.substitutions;
for (const sub of st) {
let maxD = 0;
for (const g of sub.from) {
const d = markedGlyphs.get(g);
if (d && d > maxD) maxD = d;
}
if (maxD && sub.to) markSingleGlyph(markedGlyphs, sub.to, maxD + 0x1000);
}
},
};
const GsubChaining = {
isEmpty(gsub, lookup, markedGlyphs) {
rules: for (const rule of lookup.rules) {
if (!rule.match || !rule.apply) continue rules;
// Check if all match coverages have at least one glyph in the marked glyphs set
// If not, skip to next rule
for (const m of rule.match) {
let atLeastOneMatch = false;
for (const matchGlyph of m)
if (markedGlyphs.has(matchGlyph)) atLeastOneMatch = true;
if (!atLeastOneMatch) continue rules;
}
// Check all the applications. If all of them are empty, skip to next rule.
let allApplicationsAreEmpty = true;
for (const app of rule.apply) {
if (!app.lookup.name) throw new Error("Unreachable: lookup name should be present");
if (!isLookupEmpty(gsub, app.lookup.name, markedGlyphs)) {
allApplicationsAreEmpty = false;
break;
}
}
if (allApplicationsAreEmpty) continue rules;
// This rule is valid, return false
return false;
}
return true;
},
markGlyphs(gsub, lookup, markedGlyphs) {
rules: for (const rule of lookup.rules) {
// Check whether all match coverages has at least one glyph in the sink
for (const m of rule.match) {
let atLeastOneMatch = false;
for (const matchGlyph of m)
if (markedGlyphs.has(matchGlyph)) atLeastOneMatch = true;
if (!atLeastOneMatch) continue rules;
}
// If so traverse through the lookup applications
for (const app of rule.apply) {
if (!app.lookup.name) throw new Error("Unreachable: lookup name should be present");
markGlyphsByLookup(gsub, app.lookup.name, markedGlyphs);
}
}
},
};
const GsubReverse = {
isEmpty(gsub, lookup, markedGlyphs) {
if (!lookup.rules) return true;
rules: for (const rule of lookup.rules) {
// Check if all match coverages have at least one glyph in the marked glyphs set
// If not, skip to next rule
if (!rule.match || !rule.to) continue rules;
for (const m of rule.match) {
let atLeastOneMatch = false;
for (const matchGlyph of m)
if (markedGlyphs.has(matchGlyph)) atLeastOneMatch = true;
if (!atLeastOneMatch) continue rules;
}
// This rule is valid, return false
return false;
}
return true;
},
markGlyphs(gsub, lookup, markedGlyphs) {
for (const rule of lookup.rules) {
if (rule.match && rule.to) {
const matchCoverage = rule.match[rule.inputIndex];
for (let j = 0; j < matchCoverage.length; j++) {
const d = markedGlyphs.get(matchCoverage[j]);
if (d && rule.to[j]) markSingleGlyph(markedGlyphs, rule.to[j], d + 0x1000);
}
}
}
},
};
const LookupTypehHanlderMap = {
gsub_single: GsubSingle,
gsub_multiple: GsubMultipleAlternate,
gsub_alternate: GsubMultipleAlternate,
gsub_ligature: GsubLigature,
gsub_chaining: GsubChaining,
gsub_reverse: GsubReverse,
// nothing to do here for gpos
};
///////////////////////////////////////////////////////////////////////////////////////////////////
function analyzeReferenceGraph(glyphStore, markedGlyphs) {
let depthMap = new Map();
let aliasMap = new Map();

View file

@ -3,10 +3,10 @@ import { Nwid, Wwid } from "@iosevka/glyph/relation";
import { gcFont } from "./gc.mjs";
import { finalizeGlyphs } from "./glyphs.mjs";
export function cleanupGlyphStore(cache, para, glyphStore, excludedCodePoints, restFont) {
export function cleanupGlyphStore(cache, para, glyphStore, subsetFilter, restFont) {
assignGrAndCodeRank(glyphStore, Wwid, Nwid);
assignSubRank(glyphStore);
glyphStore = gcFont(glyphStore, excludedCodePoints, restFont);
glyphStore = gcFont(glyphStore, subsetFilter, restFont);
glyphStore = finalizeGlyphs(cache, para, glyphStore);
return glyphStore;
}

View file

@ -41,7 +41,7 @@ function ConvertGsubGposImpl(handlers, T, table, glyphs) {
if (table.lookups) {
if (table.lookupOrder) {
for (const l of table.lookupOrder) {
if (!table.lookups[l]) throw new Error(`Cannot find lookup '${l}'`);
if (!table.lookups[l]) continue; // throw new Error(`Cannot find lookup '${l}'`);
ls.declare(l, table.lookups[l]);
}
}

View file

@ -31,6 +31,7 @@ export async function getParametersT(argv) {
let para = Parameters.init(deepClone(parametersData), argv);
VariantData.apply(deepClone(rawVariantsData), para, argv);
applyLigationData(deepClone(rawLigationData), para, argv);
if (argv.subset) para.subset = argv.subset;
if (argv.excludedCharRanges) para.excludedCharRanges = argv.excludedCharRanges;
if (argv.compatibilityLigatures) para.compatibilityLigatures = argv.compatibilityLigatures;
if (argv.metricOverride) applyMetricOverride(para, argv.metricOverride, argv);

View file

@ -1,5 +1,7 @@
import { monotonicInterpolate } from "@iosevka/util/monotonic-interpolate";
export { createSubsetFilter, SubsetFilter } from "./subset-filter.mjs";
export function init(data, argv) {
let para = {};
apply(para, data, ["iosevka"]);

View file

@ -0,0 +1,52 @@
import fs from "fs";
export class SubsetFilter {
constructor() {
this.includeCharSet = new Set();
this.excludeCharSet = new Set();
}
isCharacterIncluded(character) {
if (this.includeCharSet.size > 0 && !this.includeCharSet.has(character)) return false;
if (this.excludeCharSet.has(character)) return false;
return true;
}
}
export async function createSubsetFilter(directives, legacyExcludedCharRangesDirective) {
const sf = new SubsetFilter();
if (!directives) return sf;
await parseDirectives(directives.include, sf.includeCharSet);
await parseDirectives(directives.exclude, sf.excludeCharSet);
await parseDirectives({ ranges: legacyExcludedCharRangesDirective }, sf.excludeCharSet);
return sf;
}
async function parseDirectives(directives, sinkCharset) {
if (!directives) return;
if (directives.textFile) {
// Read the text file and add the characters to the sinkCharset
const fileContent = await fs.promises.readFile(directives.textFile, "utf-8");
addStringToSinkCharset(sinkCharset, fileContent);
} else if (directives.text) {
// Put the text into the sinkCharset. No comments are allowed
addStringToSinkCharset(sinkCharset, directives.text);
} else if (directives.ranges) {
for (const [lo, hi] of directives.ranges) {
if (lo > hi) continue;
for (let i = lo; i <= hi; i++) {
sinkCharset.add(i);
}
}
}
}
function addStringToSinkCharset(sinkCharset, str) {
if (!str) return;
for (const char of str) {
const codePoint = char.codePointAt(0);
if (codePoint) sinkCharset.add(codePoint);
}
}

View file

@ -357,6 +357,7 @@ const FontInfoOf = computed.group("metadata:font-info-of", async (target, fileNa
// Other parameters
compatibilityLigatures: bp.compatibilityLigatures || null,
metricOverride: bp.metricOverride || null,
subset: bp.subset || null,
excludedCharRanges: bp.excludeChars?.ranges,
// Spacing derivation -- creating faster build for spacing variants
@ -1437,7 +1438,8 @@ function validateBuildPlan(prefix, bp) {
failWithLegacyParamName(prefix, bp, `build-texture-feature`, `buildTextureFeature`);
failWithLegacyParamName(prefix, bp, `metric-override`, `metricOverride`);
failWithLegacyParamName(prefix, bp, `compatibility-ligatures`, `compatibilityLigatures`);
failWithLegacyParamName(prefix, bp, `exclude-chars`, `excludeChars`);
failWithLegacyParamName(prefix, bp, `exclude-chars`, `subset.exclude`);
warnWithLegacyParamName(prefix, bp, `excludeChars`, `subset.exclude`);
}
function failWithLegacyParamName(prefix, bp, legacy, expected) {
@ -1449,6 +1451,15 @@ function failWithLegacyParamName(prefix, bp, legacy, expected) {
}
}
function warnWithLegacyParamName(prefix, bp, legacy, expected) {
if (bp[legacy]) {
echo.warn(
`Build plan for '${prefix}' contains legacy build parameter '${legacy}'. ` +
`Please use '${expected}' instead.`,
);
}
}
function resolveWws(bpName, buildPlans, defaultConfig) {
const bp = buildPlans[bpName];
if (!bp) fail(`Build plan ${bpName} not found.`);