-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathtranslate_content.php
More file actions
171 lines (135 loc) · 6.23 KB
/
translate_content.php
File metadata and controls
171 lines (135 loc) · 6.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
<?php
// This script is used to translate all markdown files from English to the specified languages
// You'll need your own chatgpt key to run this script
$chatgpt_key = getenv('CHATGPT_KEY');
$filenames_to_skip = [
// 'awesome_plugins.md',
// 'session.md',
// 'about.md',
];
if (empty($chatgpt_key)) {
echo "You need to set the CHATGPT_KEY environment variable to run this script" . PHP_EOL;
exit(1);
}
$languages = [
'es',
'fr',
'lv',
'pt',
'de',
'ru',
'zh',
'ja',
'ko',
'uk',
'id'
];
// pull from-date from cli args
$opts = getopt('', ['from-date:', 'date-from:']);
$fromDate = $opts['from-date'] ?? 0;
// in case i'm an idiot and do date-from instead....
if (isset($opts['date-from'])) {
$fromDate = $opts['date-from'];
}
if ($fromDate) {
$fromDate = strtotime($fromDate . ' 00:00:00');
}
echo "Translating content from " . date('Y-m-d', $fromDate) . PHP_EOL;
$top_level_files = glob(__DIR__ . '/content/v3/en/*.md');
$files = array_merge($top_level_files, glob(__DIR__ . '/content/v3/en/**/*.md'));
// pull all markdown files our of the content/en/ folder and each subdirectory
foreach ($files as $file) {
echo "Processing " . $file . PHP_EOL;
if (filemtime($file) < $fromDate) {
echo " **Skipping file because it's older than the from-date**" . PHP_EOL;
continue;
}
if(in_array(basename($file), $filenames_to_skip)) {
echo " **Skipping file because it's in the skip list**" . PHP_EOL;
continue;
}
foreach ($languages as $languageAbbreviation) {
$full_response = '';
$messages = [
[
"role" => "system",
"content" => "You are a gifted translator focusing on the tech space. Today you are translating documentation for a PHP Framework called Flight (so please never translate the word 'Flight' as it's the name of the framework). You are going to receive content that is a markdown file. When you receive the content you'll translate it from english to the two letter language code that is specified. When you generate a response, you are going to ONLY send back the translated markdown content, no other replies or 'here is your translated markdown' type statements back, only the translated markdown content in markdown format. If you get a follow up response, you need to continue to markdown translation from the very character you left off at and complete the translation until the full page is done. THIS NEXT ITEM IS VERY IMPORTANT! Make sure that when you are translating any code in the markdown file that you ONLY translate the comments of the code and not the classes/methods/variables/links/urls/etc. This next part is also incredibly important or it will break the entire page!!!! Please don't translate any URLs or you will break my app and I will lose my job if this is not done correctly!!!!"
],
[
"role" => "user",
"content" => "Translate the following text from English to the two letter language code of {$languageAbbreviation}:\n\n" . file_get_contents($file)
]
];
do {
$ch = curl_init('https://api.x.ai/v1/chat/completions');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
"model" => "grok-4-fast-non-reasoning",
"messages" => $messages
]));
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Authorization: Bearer ' . $chatgpt_key,
'Content-Type: application/json'
]);
$response = curl_exec($ch);
curl_close($ch);
$response = json_decode($response, true);
$content = $response['choices'][0]['message']['content'] ?? '';
if (empty($content)) {
echo " **Skipping file because it received an empty response**" . PHP_EOL;
break;
}
$full_response .= $content;
$messages[] = [
'role' => 'assistant',
'content' => $content
];
} while ($response['usage']['completion_tokens'] === 4096);
// save the translated content to the appropriate file
$translatedFilePath = str_replace('/en/', '/' . $languageAbbreviation . '/', $file);
$directory = dirname($translatedFilePath);
if (is_dir($directory) === false) {
mkdir($directory, 0775, true);
}
if(!$full_response) {
echo " **Skipping file because it's received an empty response**" . PHP_EOL;
continue;
}
file_put_contents($translatedFilePath, $full_response);
echo " Updated: " . $translatedFilePath . PHP_EOL;
}
}
// --- Begin: Remove orphaned translated files ---
// Build a set of all relative file paths in /en/
$enFiles = [];
foreach ($files as $file) {
// Get path relative to /content/v3/en/
$enFiles[] = ltrim(str_replace(realpath(__DIR__ . '/content/v3/en/'), '', realpath($file)), '/\\');
}
// For each language, scan its directory and remove files not present in $enFiles
foreach ($languages as $languageAbbreviation) {
$langDir = __DIR__ . "/content/v3/{$languageAbbreviation}/";
if (!is_dir($langDir)) continue;
// Recursive iterator to find all .md files
$iterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($langDir, RecursiveDirectoryIterator::SKIP_DOTS)
);
foreach ($iterator as $translatedFile) {
if ($translatedFile->getExtension() !== 'md') continue;
// Get path relative to /content/v3/{lang}/
$relativePath = ltrim(str_replace(realpath($langDir), '', $translatedFile->getRealPath()), '/\\');
// If this file doesn't exist in /en/, delete it
if (!in_array($relativePath, $enFiles)) {
echo "Deleting orphaned file: {$translatedFile->getRealPath()}" . PHP_EOL;
unlink($translatedFile->getRealPath());
// Remove empty directories up the tree
$dir = dirname($translatedFile->getRealPath());
while ($dir !== $langDir && is_dir($dir) && count(glob("$dir/*")) === 0) {
rmdir($dir);
$dir = dirname($dir);
}
}
}
}
// --- End: Remove orphaned translated files ---