1
+
import fs from "fs"
2
+
import { JSDOM, DOMWindow } from "jsdom"
3
+
import { CLIEngine } from "eslint"
4
+
5
+
const DataSources = [
6
+
{
7
+
url: "https://www.ecma-international.org/ecma-262/9.0/",
8
+
version: 2018,
9
+
binProperties: "#table-binary-unicode-properties",
10
+
gcValues: "#table-unicode-general-category-values",
11
+
scValues: "#table-unicode-script-values",
12
+
},
13
+
{
14
+
url: "https://www.ecma-international.org/ecma-262/10.0/",
15
+
version: 2019,
16
+
binProperties: "#table-binary-unicode-properties",
17
+
gcValues: "#table-unicode-general-category-values",
18
+
scValues: "#table-unicode-script-values",
19
+
},
20
+
{
21
+
url: "https://tc39.es/ecma262/",
22
+
version: 2020,
23
+
binProperties: "#table-binary-unicode-properties",
24
+
gcValues: "#table-unicode-general-category-values",
25
+
scValues: "#table-unicode-script-values",
26
+
},
27
+
]
28
+
const FILE_PATH = "src/unicode/properties.ts"
29
+
const logger = console
30
+
31
+
type Datum = {
32
+
binProperties: string[]
33
+
gcValues: string[]
34
+
scValues: string[]
35
+
}
36
+
37
+
// Main
38
+
;(async () => {
39
+
const data: Record<number, Datum> = Object.create(null)
40
+
const existing = {
41
+
binProperties: new Set<string>(),
42
+
gcValues: new Set<string>(),
43
+
scValues: new Set<string>(),
44
+
}
45
+
46
+
for (const {
47
+
binProperties,
48
+
gcValues,
49
+
scValues,
50
+
url,
51
+
version,
52
+
} of DataSources) {
53
+
logger.log("---- ECMAScript %d ----", version)
54
+
const datum: Datum = {
55
+
binProperties: [],
56
+
gcValues: [],
57
+
scValues: [],
58
+
}
59
+
data[version] = datum
60
+
61
+
let window: DOMWindow | null = null
62
+
do {
63
+
try {
64
+
logger.log("Fetching data from %o", url)
65
+
;({ window } = await JSDOM.fromURL(url))
66
+
} catch (error) {
67
+
if (!error || error.message !== "Error: socket hang up") {
68
+
throw error
69
+
}
70
+
logger.log("Failed: %s", error)
71
+
await new Promise(resolve => setTimeout(resolve, 2000))
72
+
}
73
+
} while (window == null)
74
+
75
+
logger.log("Parsing tables")
76
+
datum.binProperties = collectValues(
77
+
window,
78
+
binProperties,
79
+
existing.binProperties,
80
+
)
81
+
datum.gcValues = collectValues(window, gcValues, existing.gcValues)
82
+
datum.scValues = collectValues(window, scValues, existing.scValues)
83
+
84
+
logger.log("Done")
85
+
}
86
+
87
+
logger.log("Generating code...")
88
+
let code = `/* This file was generated with ECMAScript specifications. */
89
+
90
+
const gcNamePattern = /^(?:General_Category|gc)$/u
91
+
const scNamePattern = /^(?:Script(?:_Extensions)?|scx?)$/u
92
+
const gcValuePatterns = {
93
+
${Array.from(
94
+
Object.keys(data),
95
+
version => `es${version}: null as RegExp | null,`,
96
+
).join("\n")}
97
+
}
98
+
const scValuePatterns = {
99
+
${Array.from(
100
+
Object.keys(data),
101
+
version => `es${version}: null as RegExp | null,`,
102
+
).join("\n")}
103
+
}
104
+
const binPropertyPatterns = {
105
+
${Array.from(
106
+
Object.keys(data),
107
+
version => `es${version}: null as RegExp | null,`,
108
+
).join("\n")}
109
+
}
110
+
111
+
export function isValidUnicodeProperty(version: number, name: string, value: string): boolean {
112
+
if (gcNamePattern.test(name)) {
113
+
${Array.from(Object.entries(data), ([version, { gcValues }]) =>
114
+
makeVerificationCode(version, "gcValuePatterns", gcValues, 52),
115
+
).join("\n")}
116
+
}
117
+
if (scNamePattern.test(name)) {
118
+
${Array.from(Object.entries(data), ([version, { scValues }]) =>
119
+
makeVerificationCode(version, "scValuePatterns", scValues, 52),
120
+
).join("\n")}
121
+
}
122
+
return false
123
+
}
124
+
125
+
export function isValidLoneUnicodeProperty(version: number, value: string): boolean {
126
+
${Array.from(Object.entries(data), ([version, { binProperties }]) =>
127
+
makeVerificationCode(version, "binPropertyPatterns", binProperties, 56),
128
+
).join("\n")}
129
+
return false
130
+
}
131
+
`
132
+
133
+
logger.log("Formatting code...")
134
+
const engine = new CLIEngine({ fix: true })
135
+
const result = engine.executeOnText(code, "properties.ts").results[0]
136
+
code = result.output || code
137
+
138
+
logger.log("Writing '%s'...", FILE_PATH)
139
+
await save(code)
140
+
141
+
logger.log("Completed!")
142
+
})().catch(error => {
143
+
logger.error(error.stack)
144
+
process.exitCode = 1
145
+
})
146
+
147
+
function collectValues(
148
+
window: Window,
149
+
id: string,
150
+
existingSet: Set<string>,
151
+
): string[] {
152
+
return Array.from(
153
+
window.document.querySelectorAll(`${id} td:nth-child(1) code`),
154
+
node => node.textContent || "",
155
+
)
156
+
.filter(value => {
157
+
if (existingSet.has(value)) {
158
+
return false
159
+
}
160
+
existingSet.add(value)
161
+
return true
162
+
})
163
+
.sort(undefined)
164
+
}
165
+
166
+
function makeVerificationCode(
167
+
version: string,
168
+
patternVar: string,
169
+
values: string[],
170
+
maxLen: number,
171
+
): string {
172
+
if (values.length === 0) {
173
+
return ""
174
+
}
175
+
176
+
return `
177
+
if (version >= ${version}) {
178
+
if (!${patternVar}.es${version}) {
179
+
${patternVar}.es${version} = new RegExp(
180
+
${makeRegExpPatternCode(values, maxLen)},
181
+
"u"
182
+
)
183
+
}
184
+
if (${patternVar}.es${version}.test(value)) {
185
+
return true
186
+
}
187
+
}
188
+
`
189
+
}
190
+
191
+
function makeRegExpPatternCode(names: string[], maxLen: number): string {
192
+
const lines = ["^(?:"]
193
+
for (const name of names) {
194
+
const line = lines[lines.length - 1]
195
+
const part = `${name}|`
196
+
197
+
if (line.length + part.length > maxLen) {
198
+
lines.push(part)
199
+
} else {
200
+
lines[lines.length - 1] += part
201
+
}
202
+
}
203
+
lines[lines.length - 1] = `${lines[lines.length - 1].replace(/\|$/u, "")})$`
204
+
return lines.map(line => `"${line}"`).join("+")
205
+
}
206
+
207
+
function save(content: string): Promise<void> {
208
+
return new Promise((resolve, reject) => {
209
+
fs.writeFile(FILE_PATH, content, error =>
210
+
error ? reject(error) : resolve(),
211
+
)
212
+
})
213
+
}
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4