// http://akelpad.sourceforge.net/forum/viewtopic.php?p=27664#27664
// Version: 1.0
// Author: Shengalts Aleksander aka Instructor
//
//
// Description(1033): Detect codepage by extension or by file content.
// Description(1049):        .
//
// Arguments:
// -DetectArray='[["ext1,ext2","ExpContent","ExpFlags","CodePage",BOM,DetectLang],["ext3",...]'
//
//              "ext1,ext2"  -extensions divided by semicolon. If "", any extension is matched.
//              "ExpContent" -search file content with regular expression. If "", not used.
//              "ExpFlags"   -regular expression flags.
//              "CodePage"   -default codepage specified as number, as IANA charset (e.g. "utf-8") or as regular expression backreference (e.g. "$1" or "\\1").
//              BOM          -byte order mark. If -1, it will be autodetected.
//              DetectLang   -detection language number (see LANGID_* defines). Special values:
//                              -1  don't change current detection language.
//                              -2  force to use specified "CodePage" without any detection.
// -ContentBuffer=1024  Content buffer size for regular expression search. Special values:
//                              0   use codepage recognition buffer size (default).
//                              -1  read entire file.
//
// Usage (add to "CmdLineBegin=" manual parameter):
//   /Call("Scripts::Main", 2, "AutoScript-DetectEx.js", `-DetectArray='[["cmd,bat","","","866",-1,0x0419],["xml,htm,html",".*?(encoding|charset)=([a-z\\d_\\-]+)","i","$2",-1,-2]]'`)

//Arguments
var pDetectArray=AkelPad.GetArgValue("DetectArray", "");
var nContentBuffer=AkelPad.GetArgValue("ContentBuffer", 0);

//Defines
var LANGID_NONE     =0;      //None
var LANGID_RUSSIAN  =0x0419; //Cyrillic (1251, OEM, KOI8-R, UTF-8)
var LANGID_POLISH   =0x0415; //Eastern European (1250, OEM, UTF-8)
var LANGID_GERMAN   =0x0407; //Western European (1252, OEM, UTF-8)
var LANGID_TURKISH  =0x041F; //Turkish (1254, OEM, UTF-8)
var LANGID_CHINESE  =0x0404; //Chinese (ANSI, UTF-8)
var LANGID_JAPANESE =0x0411; //Japanese (932, UTF-8)
var LANGID_KOREAN   =0x0412; //Korean (949, UTF-8)

//Variables
var hMainWnd=AkelPad.GetMainWnd();
var hScript;
var lpdwFlags;
var dwFlags;
var lpFile;
var pFile;
var pExt="";
var lpnCodePage;
var lpbBOM;
var nDefaultCodepage;
var nLangCodepageRecognition;
var nNewDefaultCodepage;
var nNewBOM;
var nNewLangCodepageRecognition;
var pContent;
var pCharset;
var oPattern;
var aDetectArray=[];
var aMatch=[];
var nOffset;
var i;

if ((hScript=AkelPad.ScriptHandle(WScript.ScriptName, 3 /*SH_FINDSCRIPT*/)) && AkelPad.ScriptHandle(hScript, 13 /*SH_GETMESSAGELOOP*/))
{
  //Script is running, second call close it.
  AkelPad.ScriptHandle(hScript, 33 /*SH_CLOSESCRIPT*/);
}
else
{
  if (pDetectArray)
    eval("aDetectArray=" + pDetectArray + ";");
  if (!nContentBuffer)
    nContentBuffer=AkelPad.SendMessage(hMainWnd, 1222 /*AKD_GETMAININFO*/, 184 /*MI_CODEPAGERECOGNITIONBUFFER*/, 0);
  if (!aDetectArray.length)
  {
    AkelPad.MessageBox(hMainWnd, "Too few parameters", WScript.ScriptName, 16 /*MB_ICONERROR*/);
    WScript.Quit();
  }
  for (i=0; i < aDetectArray.length; ++i)
  {
    aDetectArray[i][0]=aDetectArray[i][0].toLowerCase();
  }

  if (AkelPad.WindowSubClass(1 /*WSC_MAINPROC*/, MainCallback, 0x435 /*AKDN_OPENDOCUMENT_START*/,
                                                               0x436 /*AKDN_OPENDOCUMENT_FINISH*/))
  {
    //Allow other scripts running and unlock main thread from waiting this script.
    AkelPad.ScriptNoMutex(0x3 /*ULT_UNLOCKSCRIPTSQUEUE|ULT_UNLOCKPROGRAMTHREAD*/);

    //Message loop
    AkelPad.WindowGetMessage();

    AkelPad.WindowUnsubClass(1 /*WSC_MAINPROC*/);
  }
}

function MainCallback(hWnd, uMsg, wParam, lParam)
{
  if (uMsg == 0x435 /*AKDN_OPENDOCUMENT_START*/)
  {
    nDefaultCodepage=0;
    nLangCodepageRecognition=-1;
    lpdwFlags=AkelPad.MemRead(lParam + (_X64?40:20) /*offsetof(NOPENDOCUMENT, dwFlags)*/, 3 /*DT_DWORD*/);
    dwFlags=AkelPad.MemRead(lpdwFlags, 3 /*DT_DWORD*/);

    if (!(dwFlags & 0x100 /*OD_REOPEN*/))
    {
      lpFile=AkelPad.MemRead(lParam + (_X64?16:8) /*offsetof(NOPENDOCUMENT, wszFile)*/, 2 /*DT_QWORD*/);
      pFile=AkelPad.MemRead(lpFile, 1 /*DT_UNICODE*/);
      pExt=AkelPad.GetFilePath(pFile, 4 /*CPF_FILEEXT*/).toLowerCase();
      pContent="";
      pCharset="";
      nNewDefaultCodepage=0;

      for (i=0; i < aDetectArray.length; ++i)
      {
        if (aDetectArray[i][0])
        {
          if ((nOffset=aDetectArray[i][0].indexOf(pExt)) != -1 &&
              (aDetectArray[i][0].substr(nOffset + pExt.length, 1) == "" ||
               aDetectArray[i][0].substr(nOffset + pExt.length, 1) == ","))
          {
            pCharset=aDetectArray[i][3];
          }
          else continue;
        }
        if (aDetectArray[i][1])
        {
          if (!pContent)
            pContent=AkelPad.ReadFile(pFile, 0x1C /*ADT_DETECTCODEPAGE|ADT_DETECTBOM|ADT_NOMESSAGES*/, 0, 0, nContentBuffer);
          oPattern=new RegExp(aDetectArray[i][1], aDetectArray[i][2]);
          if (aMatch=pContent.match(oPattern))
          {
            if (aDetectArray[i][3].substr(0, 1) == "$" || aDetectArray[i][3].substr(0, 1) == "\\")
              pCharset=aMatch[parseInt(aDetectArray[i][3].substr(1))];
            else
              pCharset=aDetectArray[i][3];
          }
          else continue;
        }
        if (pCharset)
        {
          nNewDefaultCodepage=parseInt(pCharset);
          if (isNaN(nNewDefaultCodepage))
          {
            pCharset=pCharset.toLowerCase();
            nNewDefaultCodepage=GetCodepageByName(pCharset);
          }
          break;
        }
      }
      if (i < aDetectArray.length && nNewDefaultCodepage)
      {
        nNewBOM=aDetectArray[i][4];
        nNewLangCodepageRecognition=aDetectArray[i][5];

        if (nNewLangCodepageRecognition != -1)
        {
          if (nNewLangCodepageRecognition == -2)
          {
            if (nNewBOM == -1)
              dwFlags|=0x8 /*OD_ADT_DETECTBOM*/;
            else
            {
              lpbBOM=AkelPad.MemRead(lParam + (_X64?32:16) /*offsetof(NOPENDOCUMENT, bBOM)*/, 2 /*DT_QWORD*/);
              AkelPad.MemCopy(lpbBOM, nNewBOM, 3 /*DT_DWORD*/);
              dwFlags&=~0x8 /*OD_ADT_DETECTBOM*/;
            }
            lpnCodePage=AkelPad.MemRead(lParam + (_X64?24:12) /*offsetof(NOPENDOCUMENT, nCodePage)*/, 2 /*DT_QWORD*/);
            AkelPad.MemCopy(lpnCodePage, nNewDefaultCodepage, 3 /*DT_DWORD*/);
            AkelPad.MemCopy(lpdwFlags, dwFlags & ~0x6 /*OD_ADT_REGCODEPAGE|OD_ADT_DETECTCODEPAGE*/, 3 /*DT_DWORD*/);
            nNewDefaultCodepage=0;
          }
          else
          {
            nLangCodepageRecognition=AkelPad.SendMessage(hMainWnd, 1222 /*AKD_GETMAININFO*/, 183 /*MI_LANGCODEPAGERECOGNITION*/, 0);
            AkelPad.SendMessage(hMainWnd, 1219 /*AKD_SETMAININFO*/, 183 /*MIS_LANGCODEPAGERECOGNITION*/, nNewLangCodepageRecognition);
          }
        }
        if (nNewDefaultCodepage)
        {
          nDefaultCodepage=AkelPad.SendMessage(hMainWnd, 1222 /*AKD_GETMAININFO*/, 177 /*MI_DEFAULTCODEPAGE*/, 0);
          AkelPad.SendMessage(hMainWnd, 1219 /*AKD_SETMAININFO*/, 177 /*MIS_DEFAULTCODEPAGE*/, nNewDefaultCodepage);
        }
      }
    }
  }
  else if (uMsg == 0x436 /*AKDN_OPENDOCUMENT_FINISH*/)
  {
    if (nDefaultCodepage)
      AkelPad.SendMessage(hMainWnd, 1219 /*AKD_SETMAININFO*/, 177 /*MIS_DEFAULTCODEPAGE*/, nDefaultCodepage);
    if (nLangCodepageRecognition != -1)
      AkelPad.SendMessage(hMainWnd, 1219 /*AKD_SETMAININFO*/, 183 /*MIS_LANGCODEPAGERECOGNITION*/, nLangCodepageRecognition);
  }
}

function GetCodepageByName(pName)
{
  switch (pName)
  {
    case "ibm037": return 37;
    case "ibm437": return 437;
    case "ibm500": return 500;
    case "asmo-708": return 708;
    case "dos-720": return 720;
    case "ibm737": return 737;
    case "ibm775": return 775;
    case "ibm850": return 850;
    case "ibm852": return 852;
    case "ibm855": return 855;
    case "ibm857": return 857;
    case "ibm00858": return 858;
    case "ibm860": return 860;
    case "ibm861": return 861;
    case "dos-862": return 862;
    case "ibm863": return 863;
    case "ibm864": return 864;
    case "ibm865": return 865;
    case "cp866": return 866;
    case "ibm869": return 869;
    case "ibm870": return 870;
    case "windows-874": return 874;
    case "cp875": return 875;
    case "shift_jis": return 932;
    case "gb2312": return 936;
    case "ks_c_5601-1987": return 949;
    case "big5": return 950;
    case "ibm1026": return 1026;
    case "ibm01047": return 1047;
    case "ibm01140": return 1140;
    case "ibm01141": return 1141;
    case "ibm01142": return 1142;
    case "ibm01143": return 1143;
    case "ibm01144": return 1144;
    case "ibm01145": return 1145;
    case "ibm01146": return 1146;
    case "ibm01147": return 1147;
    case "ibm01148": return 1148;
    case "ibm01149": return 1149;
    case "utf-16le": return 1200;
    case "utf-16be": return 1201;
    case "windows-1250": return 1250;
    case "windows-1251": return 1251;
    case "windows-1252": return 1252;
    case "windows-1253": return 1253;
    case "windows-1254": return 1254;
    case "windows-1255": return 1255;
    case "windows-1256": return 1256;
    case "windows-1257": return 1257;
    case "windows-1258": return 1258;
    case "johab": return 1361;
    case "macintosh": return 10000;
    case "x-mac-japanese": return 10001;
    case "x-mac-chinesetrad": return 10002;
    case "x-mac-korean": return 10003;
    case "x-mac-arabic": return 10004;
    case "x-mac-hebrew": return 10005;
    case "x-mac-greek": return 10006;
    case "x-mac-cyrillic": return 10007;
    case "x-mac-chinesesimp": return 10008;
    case "x-mac-romanian": return 10010;
    case "x-mac-ukrainian": return 10017;
    case "x-mac-thai": return 10021;
    case "x-mac-ce": return 10029;
    case "x-mac-icelandic": return 10079;
    case "x-mac-turkish": return 10081;
    case "x-mac-croatian": return 10082;
    case "utf-32le": return 12000;
    case "utf-32be": return 12001;
    case "x-chinese_cns": return 20000;
    case "x-cp20001": return 20001;
    case "x_chinese-eten": return 20002;
    case "x-cp20003": return 20003;
    case "x-cp20004": return 20004;
    case "x-cp20005": return 20005;
    case "x-ia5": return 20105;
    case "x-ia5-german": return 20106;
    case "x-ia5-swedish": return 20107;
    case "x-ia5-norwegian": return 20108;
    case "us-ascii": return 20127;
    case "x-cp20261": return 20261;
    case "x-cp20269": return 20269;
    case "ibm273": return 20273;
    case "ibm277": return 20277;
    case "ibm278": return 20278;
    case "ibm280": return 20280;
    case "ibm284": return 20284;
    case "ibm285": return 20285;
    case "ibm290": return 20290;
    case "ibm297": return 20297;
    case "ibm420": return 20420;
    case "ibm423": return 20423;
    case "ibm424": return 20424;
    case "x-ebcdic-koreanextended": return 20833;
    case "ibm-thai": return 20838;
    case "koi8-r": return 20866;
    case "ibm871": return 20871;
    case "ibm880": return 20880;
    case "ibm905": return 20905;
    case "ibm00924": return 20924;
    case "euc-jp": return 20932;
    case "x-cp20936": return 20936;
    case "x-cp20949": return 20949;
    case "cp1025": return 21025;
    case "koi8-u": return 21866;
    case "iso-8859-1": return 28591;
    case "iso-8859-2": return 28592;
    case "iso-8859-3": return 28593;
    case "iso-8859-4": return 28594;
    case "iso-8859-5": return 28595;
    case "iso-8859-6": return 28596;
    case "iso-8859-7": return 28597;
    case "iso-8859-8": return 28598;
    case "iso-8859-9": return 28599;
    case "iso-8859-13": return 28603;
    case "iso-8859-15": return 28605;
    case "x-europa": return 29001;
    case "iso-8859-8-i": return 38598;
    case "iso-2022-jp": return 50220;
    case "csiso2022jp": return 50221;
    case "iso-2022-jp": return 50222;
    case "iso-2022-kr": return 50225;
    case "x-cp50227": return 50227;
    case "euc-jp": return 51932;
    case "euc-cn": return 51936;
    case "euc-kr": return 51949;
    case "hz-gb-2312": return 52936;
    case "gb18030": return 54936;
    case "x-iscii-de": return 57002;
    case "x-iscii-be": return 57003;
    case "x-iscii-ta": return 57004;
    case "x-iscii-te": return 57005;
    case "x-iscii-as": return 57006;
    case "x-iscii-or": return 57007;
    case "x-iscii-ka": return 57008;
    case "x-iscii-ma": return 57009;
    case "x-iscii-gu": return 57010;
    case "x-iscii-pa": return 57011;
    case "utf-7": return 65000;
    case "utf-8": return 65001;
  }
  return 0;
}
