Kôd:
// Convert both data and database tables to UTF-8 character set.
function ConvertUtf8()
{
global $scripturl, $context, $txt, $language, $db_prefix, $db_character_set;
global $modSettings, $user_info, $sourcedir;
// Show me your badge!
isAllowedTo('admin_forum');
// The character sets used in SMF's language files with their db equivalent.
$charsets = array(
// Chinese-traditional.
'big5' => 'big5',
// Chinese-simplified.
'gbk' => 'gbk',
// West European.
'ISO-8859-1' => 'latin1',
// Romanian.
'ISO-8859-2' => 'latin2',
// Turkish.
'ISO-8859-9' => 'latin5',
// Thai.
'tis-620' => 'tis620',
// Persian, Chinese, etc.
'UTF-8' => 'utf8',
// Russian.
'windows-1251' => 'cp1251',
// Greek.
'windows-1253' => 'utf8',
// Hebrew.
'windows-1255' => 'utf8',
// Arabic.
'windows-1256' => 'cp1256',
);
// Get a list of character sets supported by your MySQL server.
$request = db_query("
SHOW CHARACTER SET", __FILE__, __LINE__);
$db_charsets = array();
while ($row = mysql_fetch_assoc($request))
$db_charsets[] = $row['Charset'];
// Character sets supported by both MySQL and SMF's language files.
$charsets = array_intersect($charsets, $db_charsets);
// This is for the first screen telling backups is good.
if (!isset($_POST['proceed']))
{
adminIndex('maintain_forum');
// Character set conversions are only supported as of MySQL 4.1.2.
if (version_compare('4.1.2', preg_replace('~\-.+?$~', '', mysql_get_server_info())) > 0)
fatal_lang_error('utf8_db_version_too_low');
// Use the messages.body column as indicator for the database charset.
$request = db_query("
SHOW FULL COLUMNS
FROM {$db_prefix}messages
LIKE 'body'", __FILE__, __LINE__);
$column_info = mysql_fetch_assoc($request);
mysql_free_result($request);
// A collation looks like latin1_swedish. We only need the character set.
list($context['database_charset']) = explode('_', $column_info['Collation']);
$context['database_charset'] = in_array($context['database_charset'], $charsets) ? array_search($context['database_charset'], $charsets) : $context['database_charset'];
// No need to convert to UTF-8 if it already is.
if ($db_character_set === 'utf8' && !empty($modSettings['global_character_set']) && $modSettings['global_character_set'] === 'UTF-8')
fatal_lang_error('utf8_already_utf8');
// Grab the character set from the default language file.
loadLanguage('index', $language, true);
$context['charset_detected'] = $txt['lang_character_set'];
$context['charset_about_detected'] = sprintf($txt['utf8_detected_charset'], $language, $context['charset_detected']);
// Go back to your own language.
loadLanguage('index', $user_info['language'], true);
// Show a warning if the character set seems not to be supported.
if (!isset($charsets[$context['charset_detected']]))
{
$context['charset_warning'] = sprintf($txt['utf8_charset_not_supported'], $txt['lang_character_set']);
// Default to ISO-8859-1.
$context['charset_detected'] = 'ISO-8859-1';
}
$context['charset_list'] = array_keys($charsets);
$context['page_title'] = $txt['utf8_title'];
$context['sub_template'] = 'convert_utf8';
return;
}
// After this point we're starting the conversion. But first: session check.
checkSession();
// Translation table for the character sets not native for MySQL.
$translation_tables = array(
'windows-1255' => array(
'0x81' => '\'\'', '0x8A' => '\'\'', '0x8C' => '\'\'',
'0x8D' => '\'\'', '0x8E' => '\'\'', '0x8F' => '\'\'',
'0x90' => '\'\'', '0x9A' => '\'\'', '0x9C' => '\'\'',
'0x9D' => '\'\'', '0x9E' => '\'\'', '0x9F' => '\'\'',
'0xCA' => '\'\'', '0xD9' => '\'\'', '0xDA' => '\'\'',
'0xDB' => '\'\'', '0xDC' => '\'\'', '0xDD' => '\'\'',
'0xDE' => '\'\'', '0xDF' => '\'\'', '0xFB' => '\'\'',
'0xFC' => '\'\'', '0xFF' => '\'\'', '0xC2' => '0xFF',
'0x80' => '0xFC', '0xE2' => '0xFB', '0xA0' => '0xC2A0',
'0xA1' => '0xC2A1', '0xA2' => '0xC2A2', '0xA3' => '0xC2A3',
'0xA5' => '0xC2A5', '0xA6' => '0xC2A6', '0xA7' => '0xC2A7',
'0xA8' => '0xC2A8', '0xA9' => '0xC2A9', '0xAB' => '0xC2AB',
'0xAC' => '0xC2AC', '0xAD' => '0xC2AD', '0xAE' => '0xC2AE',
'0xAF' => '0xC2AF', '0xB0' => '0xC2B0', '0xB1' => '0xC2B1',
'0xB2' => '0xC2B2', '0xB3' => '0xC2B3', '0xB4' => '0xC2B4',
'0xB5' => '0xC2B5', '0xB6' => '0xC2B6', '0xB7' => '0xC2B7',
'0xB8' => '0xC2B8', '0xB9' => '0xC2B9', '0xBB' => '0xC2BB',
'0xBC' => '0xC2BC', '0xBD' => '0xC2BD', '0xBE' => '0xC2BE',
'0xBF' => '0xC2BF', '0xD7' => '0xD7B3', '0xD1' => '0xD781',
'0xD4' => '0xD7B0', '0xD5' => '0xD7B1', '0xD6' => '0xD7B2',
'0xE0' => '0xD790', '0xEA' => '0xD79A', '0xEC' => '0xD79C',
'0xED' => '0xD79D', '0xEE' => '0xD79E', '0xEF' => '0xD79F',
'0xF0' => '0xD7A0', '0xF1' => '0xD7A1', '0xF2' => '0xD7A2',
'0xF3' => '0xD7A3', '0xF5' => '0xD7A5', '0xF6' => '0xD7A6',
'0xF7' => '0xD7A7', '0xF8' => '0xD7A8', '0xF9' => '0xD7A9',
'0x82' => '0xE2809A', '0x84' => '0xE2809E', '0x85' => '0xE280A6',
'0x86' => '0xE280A0', '0x87' => '0xE280A1', '0x89' => '0xE280B0',
'0x8B' => '0xE280B9', '0x93' => '0xE2809C', '0x94' => '0xE2809D',
'0x95' => '0xE280A2', '0x97' => '0xE28094', '0x99' => '0xE284A2',
'0xC0' => '0xD6B0', '0xC1' => '0xD6B1', '0xC3' => '0xD6B3',
'0xC4' => '0xD6B4', '0xC5' => '0xD6B5', '0xC6' => '0xD6B6',
'0xC7' => '0xD6B7', '0xC8' => '0xD6B8', '0xC9' => '0xD6B9',
'0xCB' => '0xD6BB', '0xCC' => '0xD6BC', '0xCD' => '0xD6BD',
'0xCE' => '0xD6BE', '0xCF' => '0xD6BF', '0xD0' => '0xD780',
'0xD2' => '0xD782', '0xE3' => '0xD793', '0xE4' => '0xD794',
'0xE5' => '0xD795', '0xE7' => '0xD797', '0xE9' => '0xD799',
'0xFD' => '0xE2808E', '0xFE' => '0xE2808F', '0x92' => '0xE28099',
'0x83' => '0xC692', '0xD3' => '0xD783', '0x88' => '0xCB86',
'0x98' => '0xCB9C', '0x91' => '0xE28098', '0x96' => '0xE28093',
'0xBA' => '0xC3B7', '0x9B' => '0xE280BA', '0xAA' => '0xC397',
'0xA4' => '0xE282AA', '0xE1' => '0xD791', '0xE6' => '0xD796',
'0xE8' => '0xD798', '0xEB' => '0xD79B', '0xF4' => '0xD7A4',
'0xFA' => '0xD7AA', '0xFF' => '0xD6B2', '0xFC' => '0xE282AC',
'0xFB' => '0xD792',
),
'windows-1253' => array(
'0x81' => "''", '0x88' => "''", '0x8A' => "''",
'0x8C' => "''", '0x8D' => "''", '0x8E' => "''",
'0x8F' => "''", '0x90' => "''", '0x98' => "''",
'0x9A' => "''", '0x9C' => "''", '0x9D' => "''",
'0x9E' => "''", '0x9F' => "''", '0xAA' => "''",
'0xD2' => "''", '0xFF' => "''", '0xCE' => '0xCE9E',
'0xB8' => '0xCE88', '0xBA' => '0xCE8A', '0xBC' => '0xCE8C',
'0xBE' => '0xCE8E', '0xBF' => '0xCE8F', '0xC0' => '0xCE90',
'0xC8' => '0xCE98', '0xCA' => '0xCE9A', '0xCC' => '0xCE9C',
'0xCD' => '0xCE9D', '0xCF' => '0xCE9F', '0xDA' => '0xCEAA',
'0xE8' => '0xCEB8', '0xEA' => '0xCEBA', '0xEC' => '0xCEBC',
'0xEE' => '0xCEBE', '0xEF' => '0xCEBF', '0xC2' => '0xFF',
'0xBD' => '0xC2BD', '0xED' => '0xCEBD', '0xB2' => '0xC2B2',
'0xA0' => '0xC2A0', '0xA3' => '0xC2A3', '0xA4' => '0xC2A4',
'0xA5' => '0xC2A5', '0xA6' => '0xC2A6', '0xA7' => '0xC2A7',
'0xA8' => '0xC2A8', '0xA9' => '0xC2A9', '0xAB' => '0xC2AB',
'0xAC' => '0xC2AC', '0xAD' => '0xC2AD', '0xAE' => '0xC2AE',
'0xB0' => '0xC2B0', '0xB1' => '0xC2B1', '0xB3' => '0xC2B3',
'0xB5' => '0xC2B5', '0xB6' => '0xC2B6', '0xB7' => '0xC2B7',
'0xBB' => '0xC2BB', '0xE2' => '0xCEB2', '0x80' => '0xD2',
'0x82' => '0xE2809A', '0x84' => '0xE2809E', '0x85' => '0xE280A6',
'0x86' => '0xE280A0', '0xA1' => '0xCE85', '0xA2' => '0xCE86',
'0x87' => '0xE280A1', '0x89' => '0xE280B0', '0xB9' => '0xCE89',
'0x8B' => '0xE280B9', '0x91' => '0xE28098', '0x99' => '0xE284A2',
'0x92' => '0xE28099', '0x93' => '0xE2809C', '0x94' => '0xE2809D',
'0x95' => '0xE280A2', '0x96' => '0xE28093', '0x97' => '0xE28094',
'0x9B' => '0xE280BA', '0xAF' => '0xE28095', '0xB4' => '0xCE84',
'0xC1' => '0xCE91', '0xC3' => '0xCE93', '0xC4' => '0xCE94',
'0xC5' => '0xCE95', '0xC6' => '0xCE96', '0x83' => '0xC692',
'0xC7' => '0xCE97', '0xC9' => '0xCE99', '0xCB' => '0xCE9B',
'0xD0' => '0xCEA0', '0xD1' => '0xCEA1', '0xD3' => '0xCEA3',
'0xD4' => '0xCEA4', '0xD5' => '0xCEA5', '0xD6' => '0xCEA6',
'0xD7' => '0xCEA7', '0xD8' => '0xCEA8', '0xD9' => '0xCEA9',
'0xDB' => '0xCEAB', '0xDC' => '0xCEAC', '0xDD' => '0xCEAD',
'0xDE' => '0xCEAE', '0xDF' => '0xCEAF', '0xE0' => '0xCEB0',
'0xE1' => '0xCEB1', '0xE3' => '0xCEB3', '0xE4' => '0xCEB4',
'0xE5' => '0xCEB5', '0xE6' => '0xCEB6', '0xE7' => '0xCEB7',
'0xE9' => '0xCEB9', '0xEB' => '0xCEBB', '0xF0' => '0xCF80',
'0xF1' => '0xCF81', '0xF2' => '0xCF82', '0xF3' => '0xCF83',
'0xF4' => '0xCF84', '0xF5' => '0xCF85', '0xF6' => '0xCF86',
'0xF7' => '0xCF87', '0xF8' => '0xCF88', '0xF9' => '0xCF89',
'0xFA' => '0xCF8A', '0xFB' => '0xCF8B', '0xFC' => '0xCF8C',
'0xFD' => '0xCF8D', '0xFE' => '0xCF8E', '0xFF' => '0xCE92',
'0xD2' => '0xE282AC',
),
);
// Make some preparations.
if (isset($translation_tables[$_POST['src_charset']]))
{
$replace = '%field%';
foreach ($translation_tables[$_POST['src_charset']] as $from => $to)
$replace = "REPLACE($replace, $from, $to)";
}
// Grab a list of tables.
if (preg_match('~^`(.+?)`\.(.+?)$~', $db_prefix, $match) === 1)
$queryTables = db_query("
SHOW TABLE STATUS
FROM `" . strtr($match[1], array('`' => '')) . "`
LIKE '" . str_replace('_', '\_', $match[2]) . "%'", __FILE__, __LINE__);
else
$queryTables = db_query("
SHOW TABLE STATUS
LIKE '" . str_replace('_', '\_', $db_prefix) . "%'", __FILE__, __LINE__);
while ($table_info = mysql_fetch_assoc($queryTables))
{
// Just to make sure it doesn't time out.
if (function_exists('apache_reset_timeout'))
apache_reset_timeout();
$table_charsets = array();
// Loop through each column.
$queryColumns = db_query("
SHOW FULL COLUMNS
FROM $table_info[Name]", __FILE__, __LINE__);
while ($column_info = mysql_fetch_assoc($queryColumns))
{
// Only text'ish columns have a character set and need converting.
if (strpos($column_info['Type'], 'text') !== false || strpos($column_info['Type'], 'char') !== false)
{
$collation = empty($column_info['Collation']) || $column_info['Collation'] === 'NULL' ? $table_info['Collation'] : $column_info['Collation'];
if (!empty($collation) && $collation !== 'NULL')
{
list($charset) = explode('_', $collation);
if (!isset($table_charsets[$charset]))
$table_charsets[$charset] = array();
$table_charsets[$charset][] = $column_info;
}
}
}
mysql_free_result($queryColumns);
// Only change the column if the data doesn't match the current charset.
if ((count($table_charsets) === 1 && key($table_charsets) !== $charsets[$_POST['src_charset']]) || count($table_charsets) > 1)
{
$updates_blob = '';
$updates_text = '';
foreach ($table_charsets as $charset => $columns)
{
if ($charset !== $charsets[$_POST['src_charset']])
{
foreach ($columns as $column)
{
$updates_blob .= "
CHANGE COLUMN $column[Field] $column[Field] " . strtr($column['Type'], array('text' => 'blob', 'char' => 'binary')) . ($column['Null'] === 'YES' ? ' NULL' : ' NOT NULL') . (strpos($column['Type'], 'char') === false ? '' : " default '$column[Default]'") . ',';
$updates_text .= "
CHANGE COLUMN $column[Field] $column[Field] $column[Type] CHARACTER SET " . $charsets[$_POST['src_charset']] . ($column['Null'] === 'YES' ? '' : ' NOT NULL') . (strpos($column['Type'], 'char') === false ? '' : " default '$column[Default]'") . ',';
}
}
}
// Change the columns to binary form.
db_query("
ALTER TABLE $table_info[Name]" . substr($updates_blob, 0, -1), __FILE__, __LINE__);
// Convert the character set if MySQL has no native support for it.
if (isset($translation_tables[$_POST['src_charset']]))
{
$update = '';
foreach ($table_charsets as $charset => $columns)
foreach ($columns as $column)
$update .= "
$column[Field] = " . strtr($replace, array('%field%' => $column['Field'])) . ',';
db_query("
UPDATE $table_info[Name]
SET " . substr($update, 0, -1), __FILE__, __LINE__);
}
// Change the columns back, but with the proper character set.
db_query("
ALTER TABLE $table_info[Name]" . substr($updates_text, 0, -1), __FILE__, __LINE__);
}
// Now do the actual conversion (if still needed).
if ($charsets[$_POST['src_charset']] !== 'utf8')
db_query("
ALTER TABLE $table_info[Name]
CONVERT TO CHARACTER SET utf8", __FILE__, __LINE__);
}
mysql_free_result($queryTables);
// Let the settings know we have a new character set.
updateSettings(array('global_character_set' => 'UTF-8'));
updateSettingsFile(array('db_character_set' => '\'utf8\''));
// The conversion might have messed up some serialized strings. Fix them!
require_once($sourcedir . '/Subs-Charset.php');
fix_serialized_columns();
redirectExit('action=maintain');
}
// Convert HTML-entities to their UTF-8 character equivalents.
function ConvertEntities()
{
global $db_prefix, $db_character_set, $modSettings, $context, $sourcedir;
isAllowedTo('admin_forum');
// Show the maintenance highlighted on the admin bar.
adminIndex('maintain_forum');
// Check to see if UTF-8 is currently the default character set.
if ($modSettings['global_character_set'] !== 'UTF-8' || !isset($db_character_set) || $db_character_set !== 'utf8')
fatal_lang_error('entity_convert_only_utf8');
// Select the sub template from the Admin template.
$context['sub_template'] = 'convert_entities';
// Some starting values.
$context['table'] = empty($_REQUEST['table']) ? 0 : (int) $_REQUEST['table'];
$context['start'] = empty($_REQUEST['start']) ? 0 : (int) $_REQUEST['start'];
$context['start_time'] = time();
$context['first_step'] = !isset($_REQUEST['sesc']);
$context['last_step'] = false;
// The first step is just a text screen with some explanation.
if ($context['first_step'])
return;
// Now we're actually going to convert...
checkSession('get');
// A list of tables ready for conversion.
$tables = array(
'ban_groups',
'ban_items',
'boards',
'calendar',
'calendar_holidays',
'categories',
'log_errors',
'log_search_subjects',
'membergroups',
'members',
'message_icons',
'messages',
'package_servers',
'personal_messages',
'pm_recipients',
'polls',
'poll_choices',
'smileys',
'themes',
);
$context['num_tables'] = count($tables);
// This function will do the conversion later on.
$entity_replace = create_function('$string', '
$num = substr($string, 0, 1) === \'x\' ? hexdec(substr($string, 1)) : (int) $string;
return $num < 0x20 || $num > 0x10FFFF || ($num >= 0xD800 && $num <= 0xDFFF) ? \'\' : ($num < 0x80 ? \'&#\' . $num . \';\' : ($num < 0x800 ? chr(192 | $num >> 6) . chr(128 | $num & 63) : ($num < 0x10000 ? chr(224 | $num >> 12) . chr(128 | $num >> 6 & 63) . chr(128 | $num & 63) : chr(240 | $num >> 18) . chr(128 | $num >> 12 & 63) . chr(128 | $num >> 6 & 63) . chr(128 | $num & 63))));');
// Loop through all tables that need converting.
for (; $context['table'] < $context['num_tables']; $context['table']++)
{
$cur_table = $tables[$context['table']];
$primary_key = '';
if (function_exists('apache_reset_timeout'))
apache_reset_timeout();
// Get a list of text columns.
$columns = array();
$request = db_query("
SHOW FULL COLUMNS
FROM {$db_prefix}$cur_table", __FILE__, __LINE__);
while ($column_info = mysql_fetch_assoc($request))
if (strpos($column_info['Type'], 'text') !== false || strpos($column_info['Type'], 'char') !== false)
$columns[] = $column_info['Field'];
// Get the column with the (first) primary key.
$request = db_query("
SHOW KEYS
FROM {$db_prefix}$cur_table", __FILE__, __LINE__);
while ($row = mysql_fetch_assoc($request))
{
if ($row['Key_name'] === 'PRIMARY' && $row['Seq_in_index'] == 1)
{
$primary_key = $row['Column_name'];
break;
}
}
mysql_free_result($request);
// No primary key, no glory.
if (empty($primary_key))
continue;
// Get the maximum value for the primary key.
$request = db_query("
SELECT MAX($primary_key)
FROM {$db_prefix}$cur_table", __FILE__, __LINE__);
list($max_value) = mysql_fetch_row($request);
mysql_free_result($request);
if (empty($max_value))
continue;
while ($context['start'] <= $max_value)
{
// Retrieve a list of rows that has at least one entity to convert.
$request = db_query("
SELECT $primary_key, " . implode(', ', $columns) . "
FROM {$db_prefix}$cur_table
WHERE $primary_key BETWEEN $context[start] AND $context[start] + 499
AND (" . implode(" LIKE '%&#%' OR ", $columns) . " LIKE '%&#%')
LIMIT 500", __FILE__, __LINE__);
while ($row = mysql_fetch_assoc($request))
{
$changes = array();
foreach ($row as $column_name => $column_value)
if ($column_name !== $primary_key && strpos($column_value, '&#') !== false)
$changes[] = "$column_name = '" . addslashes(preg_replace('~(&#(\d{1,7}|x[0-9a-fA-F]{1,6});)~e', '$entity_replace(\'\\2\')', $column_value)) . "'";
// Update the row.
if (!empty($changes))
db_query("
UPDATE {$db_prefix}$cur_table
SET
" . implode(",
", $changes) . "
WHERE $primary_key = " . $row[$primary_key] . "
LIMIT 1", __FILE__, __LINE__);
}
mysql_free_result($request);
$context['start'] += 500;
// After ten seconds interrupt.
if (time() - $context['start_time'] > 10)
{
// Calculate an approximation of the percentage done.
$context['percent_done'] = round(100 * ($context['table'] + ($context['start'] / $max_value)) / $context['num_tables'], 1);
$context['continue_get_data'] = '?action=convertentities;table=' . $context['table'] . ';start=' . $context['start'] . ';sesc=' . $context['session_id'];
return;
}
}
$context['start'] = 0;
}
// Make sure all serialized strings are all right.
require_once($sourcedir . '/Subs-Charset.php');
fix_serialized_columns();
// If we're here, we must be done.
$context['percent_done'] = 100;
$context['continue_get_data'] = '?action=maintain';
$context['last_step'] = true;
}