MgAsmBase.cpp 65 KB


  1. // MgAsmBase.cpp: implementation of the CMgAsmBase class.
  2. //
  3. //////////////////////////////////////////////////////////////////////
  4. #include "stdafx.h"
  5. #include "MgAsmCom.h"
  6. #include "MgAsmComDef.h"
  7. //-------------------------------------------------------------------------------------------------------------------------
  8. //全局变量声明:
  9. //-------------------------------------------------------------------------------------------------------------------------
  10. //
  11. //////////////////////////////////////////////////////////////////////
  12. // Construction/Destruction
  13. //////////////////////////////////////////////////////////////////////
  14. CMgAsmBase::CMgAsmBase()
  15. {
  16. //
  17. m_pAsmCmd = NULL; // Pointer to 0-terminated source line
  18. m_nScan = 0; // Type of last scanned element
  19. m_nPrio = 0; // Priority of operation (0: highest)
  20. //
  21. memset(m_sdata, 0, TEXTLEN); // Last scanned name (depends on type)
  22. //
  23. m_idata = 0; // Last scanned value
  24. m_fdata = 0; // Floating-point number
  25. m_pAsmError = NULL; // Explanation of last error, or NULL
  26. //
  27. m_nIDEAL = 0; // Force IDEAL decoding mode
  28. m_nSizeSens = 0; // How to decode size-sensitive mnemonics
  29. }
  30. CMgAsmBase::~CMgAsmBase()
  31. {
  32. }
  33. //--------------------------------------------------------------------------------
  34. // Simple and slightly recursive scanner shared by Assemble(). The scanner is
  35. // straightforward and ineffective, but high speed is not a must here. As
  36. // input, it uses global pointer to source line asmcmd. On exit, it fills in
  37. // global variables scan, prio, sdata, idata and/or fdata. If some error is
  38. // detected, asmerror points to error message, otherwise asmerror remains
  39. // unchanged.
  40. //--------------------------------------------------------------------------------
  41. void CMgAsmBase::Scanasm(int mode)
  42. {
  43. int i, j, base, maxdigit;
  44. long decimal, hex;
  45. long double floating, divisor;
  46. char s[TEXTLEN], * pcmd;
  47. m_sdata[0] = '\0';
  48. m_idata = 0;
  49. //
  50. if (m_pAsmCmd == NULL)
  51. {
  52. m_pAsmError = ("NULL input line");
  53. m_nScan = SCAN_ERR;
  54. return;
  55. }
  56. //
  57. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  58. {
  59. m_pAsmCmd++; // Skip leading spaces
  60. }
  61. //
  62. if (*m_pAsmCmd == '\0' || *m_pAsmCmd == ';')
  63. {
  64. m_nScan = SCAN_EOL; // Empty line
  65. return;
  66. }
  67. //
  68. if (isalpha(*m_pAsmCmd) || *m_pAsmCmd == '_' || *m_pAsmCmd == '@')
  69. {
  70. m_sdata[0] = *m_pAsmCmd++;
  71. i = 1; // Some keyword or identifier
  72. while ((isalnum(*m_pAsmCmd) || *m_pAsmCmd == '_' || *m_pAsmCmd == '@') && i < sizeof(m_sdata))
  73. {
  74. m_sdata[i++] = *m_pAsmCmd++;
  75. }
  76. if (i >= sizeof(m_sdata))
  77. {
  78. m_pAsmError = ("Too long identifier");
  79. m_nScan = SCAN_ERR;
  80. return;
  81. }
  82. m_sdata[i] = '\0';
  83. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  84. {
  85. m_pAsmCmd++; // Skip trailing spaces
  86. }
  87. strcpy(s, m_sdata);
  88. strupr(s);
  89. for (j = 0; j <= 8; j++)
  90. {
  91. // j==8 means "any register"
  92. if (strcmp(s, g_szRegName[0][j]) != 0)
  93. {
  94. continue;
  95. }
  96. m_idata = j;
  97. m_nScan = SCAN_REG8; // 8-bit register
  98. return;
  99. }
  100. for (j = 0; j <= 8; j++)
  101. {
  102. if (strcmp(s, g_szRegName[1][j]) != 0)
  103. {
  104. continue;
  105. }
  106. m_idata = j;
  107. m_nScan = SCAN_REG16; // 16-bit register
  108. return;
  109. }
  110. for (j = 0; j <= 8; j++)
  111. {
  112. if (strcmp(s, g_szRegName[2][j]) != 0)
  113. {
  114. continue;
  115. }
  116. m_idata = j;
  117. m_nScan = SCAN_REG32; // 32-bit register
  118. return;
  119. }
  120. for (j = 0; j < 6; j++)
  121. {
  122. if (strcmp(s, g_szSegName[j]) != 0)
  123. {
  124. continue;
  125. }
  126. m_idata = j;
  127. m_nScan = SCAN_SEG; // Segment register
  128. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  129. {
  130. m_pAsmCmd++; // Skip trailing spaces
  131. }
  132. return;
  133. }
  134. if (strcmp(s, "ST") == 0)
  135. {
  136. pcmd = m_pAsmCmd;
  137. Scanasm(SA_NAME); // FPU register
  138. if (m_nScan != SCAN_SYMB || m_idata != '(')
  139. {
  140. m_pAsmCmd = pcmd; // Undo last scan
  141. m_idata = 0;
  142. m_nScan = SCAN_FPU;
  143. return;
  144. }
  145. Scanasm(SA_NAME);
  146. j = m_idata;
  147. if ((m_nScan != SCAN_ICONST && m_nScan != SCAN_DCONST) || m_idata < 0 || m_idata > 7)
  148. {
  149. m_pAsmError = ("FPU registers have indexes 0 to 7");
  150. m_nScan = SCAN_ERR;
  151. return;
  152. }
  153. Scanasm(SA_NAME);
  154. if (m_nScan != SCAN_SYMB || m_idata != ')')
  155. {
  156. m_pAsmError = ("Closing parenthesis expected");
  157. m_nScan = SCAN_ERR;
  158. return;
  159. }
  160. m_idata = j;
  161. m_nScan = SCAN_FPU;
  162. return;
  163. }
  164. for (j = 0; j <= 8; j++)
  165. {
  166. if (strcmp(s, g_szFPUName[j]) != 0)
  167. {
  168. continue;
  169. }
  170. m_idata = j;
  171. m_nScan = SCAN_FPU; // FPU register (alternative coding)
  172. return;
  173. }
  174. for (j = 0; j <= 8; j++)
  175. {
  176. if (strcmp(s, g_szMMXName[j]) != 0)
  177. {
  178. continue;
  179. }
  180. m_idata = j;
  181. m_nScan = SCAN_MMX; // MMX register
  182. return;
  183. }
  184. for (j = 0; j <= 8; j++)
  185. {
  186. if (strcmp(s, g_szCRName[j]) != 0)
  187. {
  188. continue;
  189. }
  190. m_idata = j;
  191. m_nScan = SCAN_CR; // Control register
  192. return;
  193. }
  194. for (j = 0; j <= 8; j++)
  195. {
  196. if (strcmp(s, g_szDRName[j]) != 0) continue;
  197. m_idata = j;
  198. m_nScan = SCAN_DR; // Debug register
  199. return;
  200. }
  201. for (j = 0; j < sizeof(g_szSizeName) / sizeof(g_szSizeName[0]); j++)
  202. {
  203. if (strcmp(s, g_szSizeName[j]) != 0)
  204. {
  205. continue;
  206. }
  207. pcmd = m_pAsmCmd;
  208. Scanasm(SA_NAME);
  209. if (m_nScan != SCAN_PTR) // Fetch non-functional "PTR"
  210. {
  211. m_pAsmCmd = pcmd;
  212. }
  213. m_idata = j;
  214. m_nScan = SCAN_OPSIZE; // Operand (data) size in bytes
  215. return;
  216. }
  217. if (strcmp(s, ("EIP")) == 0)
  218. {
  219. // Register EIP
  220. m_nScan = SCAN_EIP;
  221. m_idata = 0;
  222. return;
  223. }
  224. if (strcmp(s, ("SHORT")) == 0)
  225. {
  226. // Relative jump has 1-byte offset
  227. m_nScan = SCAN_JMPSIZE;
  228. m_idata = 1;
  229. return;
  230. }
  231. if (strcmp(s, ("LONG")) == 0)
  232. {
  233. // Relative jump has 4-byte offset
  234. m_nScan = SCAN_JMPSIZE;
  235. m_idata = 2;
  236. return;
  237. }
  238. if (strcmp(s, ("NEAR")) == 0)
  239. {
  240. // Jump within same code segment
  241. m_nScan = SCAN_JMPSIZE;
  242. m_idata = 4;
  243. return;
  244. }
  245. if (strcmp(s, ("FAR")) == 0)
  246. {
  247. // Jump to different code segment
  248. m_nScan = SCAN_JMPSIZE;
  249. m_idata = 8;
  250. return;
  251. }
  252. if (strcmp(s, ("LOCAL")) == 0 && *m_pAsmCmd == '.')
  253. {
  254. m_pAsmCmd++;
  255. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  256. {
  257. m_pAsmCmd++; // Skip trailing spaces
  258. }
  259. if (!isdigit(*m_pAsmCmd))
  260. {
  261. m_pAsmError = ("Integer number expected");
  262. m_nScan = SCAN_ERR;
  263. return;
  264. }
  265. while (isdigit(*m_pAsmCmd)) // LOCAL index is decimal number!
  266. {
  267. m_idata = m_idata * 10 + (*m_pAsmCmd++) - '0';
  268. }
  269. m_nScan = SCAN_LOCAL;
  270. return;
  271. }
  272. if (strcmp(s, ("ARG")) == 0 && *m_pAsmCmd == '.')
  273. {
  274. m_pAsmCmd++;
  275. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  276. {
  277. m_pAsmCmd++; // Skip trailing spaces
  278. }
  279. if (!isdigit(*m_pAsmCmd))
  280. {
  281. m_pAsmError = ("Integer number expected");
  282. m_nScan = SCAN_ERR;
  283. return;
  284. }
  285. while (isdigit(*m_pAsmCmd)) // ARG index is decimal number!
  286. {
  287. m_idata = m_idata * 10 + (*m_pAsmCmd++) - '0';
  288. }
  289. m_nScan = SCAN_ARG;
  290. return;
  291. }
  292. if (strcmp(s, ("REP")) == 0)
  293. {
  294. m_nScan = SCAN_REP; // REP prefix
  295. return;
  296. }
  297. if (strcmp(s, ("REPE")) == 0 || strcmp(s, ("REPZ")) == 0)
  298. {
  299. m_nScan = SCAN_REPE; // REPE prefix
  300. return;
  301. }
  302. if (strcmp(s, ("REPNE")) == 0 || strcmp(s, ("REPNZ")) == 0)
  303. {
  304. m_nScan = SCAN_REPNE; // REPNE prefix
  305. return;
  306. }
  307. if (strcmp(s, ("LOCK")) == 0)
  308. {
  309. m_nScan = SCAN_LOCK; // LOCK prefix
  310. return;
  311. }
  312. if (strcmp(s, ("PTR")) == 0)
  313. {
  314. m_nScan = SCAN_PTR; // PTR in MASM addressing statements
  315. return;
  316. }
  317. if (strcmp(s, ("CONST")) == 0 || strcmp(s, ("OFFSET")) == 0)
  318. {
  319. m_nScan = SCAN_OFS; // Present but undefined offset/constant
  320. return;
  321. }
  322. if (strcmp(s, ("SIGNED")) == 0)
  323. {
  324. m_nScan = SCAN_SIGNED; // Keyword "SIGNED" (in expressions)
  325. return;
  326. }
  327. if (strcmp(s, ("UNSIGNED")) == 0)
  328. {
  329. m_nScan = SCAN_UNSIGNED; // Keyword "UNSIGNED" (in expressions)
  330. return;
  331. }
  332. if (strcmp(s, ("CHAR")) == 0)
  333. {
  334. m_nScan = SCAN_CHAR; // Keyword "CHAR" (in expressions)
  335. return;
  336. }
  337. if (strcmp(s, ("FLOAT")) == 0)
  338. {
  339. m_nScan = SCAN_FLOAT; // Keyword "FLOAT" (in expressions)
  340. return;
  341. }
  342. if (strcmp(s, ("DOUBLE")) == 0)
  343. {
  344. m_nScan = SCAN_DOUBLE; // Keyword "DOUBLE" (in expressions)
  345. return;
  346. }
  347. if (strcmp(s, ("FLOAT10")) == 0)
  348. {
  349. m_nScan = SCAN_FLOAT10; // Keyword "FLOAT10" (in expressions)
  350. return;
  351. }
  352. if (strcmp(s, ("STRING")) == 0)
  353. {
  354. m_nScan = SCAN_STRING; // Keyword "STRING" (in expressions)
  355. return;
  356. }
  357. if (strcmp(s, ("UNICODE")) == 0)
  358. {
  359. m_nScan = SCAN_UNICODE; // Keyword "UNICODE" (in expressions)
  360. return;
  361. }
  362. if (strcmp(s, ("MSG")) == 0)
  363. {
  364. m_nScan = SCAN_MSG; // Pseudovariable MSG (in expressions)
  365. return;
  366. }
  367. if (mode & SA_NAME)
  368. {
  369. m_idata = i;
  370. m_nScan = SCAN_NAME; // Don't try to decode symbolic label
  371. return;
  372. }
  373. m_pAsmError = ("Unknown identifier");
  374. m_nScan = SCAN_ERR;
  375. return;
  376. }
  377. else if (isdigit(*m_pAsmCmd)) // Constant
  378. {
  379. base = 0;
  380. maxdigit = 0;
  381. decimal = hex = 0L;
  382. floating = 0.0;
  383. if (m_pAsmCmd[0] == '0' && toupper(m_pAsmCmd[1]) == 'X')
  384. {
  385. base = 16;
  386. m_pAsmCmd += 2;
  387. } // Force hexadecimal number
  388. while (1)
  389. {
  390. if (isdigit(*m_pAsmCmd))
  391. {
  392. decimal = decimal * 10 + (*m_pAsmCmd) - '0';
  393. floating = floating * 10.0 + (*m_pAsmCmd) - '0';
  394. hex = hex * 16 + (*m_pAsmCmd) - '0';
  395. if (maxdigit == 0)
  396. {
  397. maxdigit = 9;
  398. }
  399. m_pAsmCmd++;
  400. }
  401. else if (isxdigit(*m_pAsmCmd))
  402. {
  403. hex = hex * 16 + toupper(*m_pAsmCmd++) - 'A' + 10;
  404. maxdigit = 15;
  405. }
  406. else
  407. {
  408. break;
  409. }
  410. }
  411. if (maxdigit == 0)
  412. {
  413. m_pAsmError = ("Hexadecimal digits after 0x... expected");
  414. m_nScan = SCAN_ERR;
  415. return;
  416. }
  417. if (toupper(*m_pAsmCmd) == 'H')
  418. {
  419. // Force hexadecimal number
  420. if (base == 16)
  421. {
  422. m_pAsmError = ("Please don't mix 0xXXXX and XXXXh forms");
  423. m_nScan = SCAN_ERR;
  424. return;
  425. }
  426. m_pAsmCmd++;
  427. m_idata = hex;
  428. m_nScan = SCAN_ICONST;
  429. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  430. {
  431. m_pAsmCmd++;
  432. }
  433. return;
  434. }
  435. if (*m_pAsmCmd == '.')
  436. {
  437. // Force decimal number
  438. if (base == 16 || maxdigit > 9)
  439. {
  440. m_pAsmError = ("Not a decimal number");
  441. m_nScan = SCAN_ERR;
  442. return;
  443. }
  444. m_pAsmCmd++;
  445. if (isdigit(*m_pAsmCmd) || toupper(*m_pAsmCmd) == 'E')
  446. {
  447. divisor = 1.0;
  448. while (isdigit(*m_pAsmCmd))
  449. {
  450. // Floating-point number
  451. divisor /= 10.0;
  452. floating += divisor * (*m_pAsmCmd - '0');
  453. m_pAsmCmd++;
  454. }
  455. if (toupper(*m_pAsmCmd) == 'E')
  456. {
  457. m_pAsmCmd++;
  458. if (*m_pAsmCmd == '-')
  459. {
  460. base = -1;
  461. m_pAsmCmd++;
  462. }
  463. else
  464. {
  465. base = 1;
  466. }
  467. if (!isdigit(*m_pAsmCmd))
  468. {
  469. m_pAsmError = ("Invalid exponent");
  470. m_nScan = SCAN_ERR;
  471. return;
  472. }
  473. decimal = 0;
  474. while (isdigit(*m_pAsmCmd))
  475. {
  476. if (decimal < 65536L) decimal = decimal * 10 + (*m_pAsmCmd++) - '0';
  477. }
  478. //floating*=pow10l(decimal*base);
  479. floating *= powl(10, decimal * base);
  480. }
  481. m_fdata = floating;
  482. m_nScan = SCAN_FCONST;
  483. return;
  484. }
  485. else
  486. {
  487. m_idata = decimal;
  488. m_nScan = SCAN_DCONST;
  489. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  490. {
  491. m_pAsmCmd++;
  492. }
  493. return;
  494. }
  495. }
  496. m_idata = hex;
  497. m_nScan = SCAN_ICONST; // Default is hexadecimal
  498. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  499. {
  500. m_pAsmCmd++;
  501. }
  502. return;
  503. }
  504. else if (*m_pAsmCmd == '\'') // Character constant
  505. {
  506. m_pAsmCmd++;
  507. if (*m_pAsmCmd == '\0' || (*m_pAsmCmd == '\\' && m_pAsmCmd[1] == '\0'))
  508. {
  509. m_pAsmError = ("Unterminated character constant");
  510. m_nScan = SCAN_ERR;
  511. return;
  512. }
  513. if (*m_pAsmCmd == '\'')
  514. {
  515. m_pAsmError = ("Empty character constant");
  516. m_nScan = SCAN_ERR;
  517. return;
  518. }
  519. if (*m_pAsmCmd == '\\')
  520. {
  521. m_pAsmCmd++;
  522. }
  523. m_idata = *m_pAsmCmd++;
  524. if (*m_pAsmCmd != '\'')
  525. {
  526. m_pAsmError = ("Unterminated character constant");
  527. m_nScan = SCAN_ERR;
  528. return;
  529. }
  530. m_pAsmCmd++;
  531. while (*m_pAsmCmd == ' ' || *m_pAsmCmd == '\t')
  532. {
  533. m_pAsmCmd++;
  534. }
  535. m_nScan = SCAN_ICONST;
  536. return;
  537. }
  538. else // Any other character or combination
  539. {
  540. m_idata = m_sdata[0] = *m_pAsmCmd++;
  541. m_sdata[1] = m_sdata[2] = '\0';
  542. if (m_idata == '|' && *m_pAsmCmd == '|')
  543. {
  544. m_idata = '||';
  545. m_nPrio = 10; // '||'
  546. m_sdata[1] = *m_pAsmCmd++;
  547. }
  548. else if (m_idata == '&' && *m_pAsmCmd == '&')
  549. {
  550. m_idata = '&&';
  551. m_nPrio = 9; // '&&'
  552. m_sdata[1] = *m_pAsmCmd++;
  553. }
  554. else if (m_idata == '=' && *m_pAsmCmd == '=')
  555. {
  556. m_idata = '==';
  557. m_nPrio = 5; // '=='
  558. m_sdata[1] = *m_pAsmCmd++;
  559. }
  560. else if (m_idata == '!' && *m_pAsmCmd == '=')
  561. {
  562. m_idata = '!=';
  563. m_nPrio = 5; // '!='
  564. m_sdata[1] = *m_pAsmCmd++;
  565. }
  566. else if (m_idata == '<' && *m_pAsmCmd == '=')
  567. {
  568. m_idata = '<=';
  569. m_nPrio = 4; // '<='
  570. m_sdata[1] = *m_pAsmCmd++;
  571. }
  572. else if (m_idata == '>' && *m_pAsmCmd == '=')
  573. {
  574. m_idata = '>=';
  575. m_nPrio = 4; // '>='
  576. m_sdata[1] = *m_pAsmCmd++;
  577. }
  578. else if (m_idata == '<' && *m_pAsmCmd == '<')
  579. {
  580. m_idata = '<<';
  581. m_nPrio = 3; // '<<'
  582. m_sdata[1] = *m_pAsmCmd++;
  583. }
  584. else if (m_idata == '>' && *m_pAsmCmd == '>')
  585. {
  586. m_idata = '>>';
  587. m_nPrio = 3; // '>>'
  588. m_sdata[1] = *m_pAsmCmd++;
  589. }
  590. else if (m_idata == '|')
  591. {
  592. m_nPrio = 8; // '|'
  593. }
  594. else if (m_idata == '^')
  595. {
  596. m_nPrio = 7; // '^'
  597. }
  598. else if (m_idata == '&')
  599. {
  600. m_nPrio = 6; // '&'
  601. }
  602. else if (m_idata == '<')
  603. {
  604. if (*m_pAsmCmd == '&')
  605. {
  606. // Import pseudolabel (for internal use)
  607. if ((mode & SA_IMPORT) == 0)
  608. {
  609. m_pAsmError = ("Syntax error");
  610. m_nScan = SCAN_ERR;
  611. return;
  612. }
  613. m_pAsmCmd++;
  614. i = 0;
  615. while (*m_pAsmCmd != '\0' && *m_pAsmCmd != '>')
  616. {
  617. m_sdata[i++] = *m_pAsmCmd++;
  618. if (i >= sizeof(m_sdata))
  619. {
  620. m_pAsmError = ("Too long import name");
  621. m_nScan = SCAN_ERR;
  622. return;
  623. }
  624. }
  625. if (*m_pAsmCmd != '>')
  626. {
  627. m_pAsmError = ("Unterminated import name");
  628. m_nScan = SCAN_ERR;
  629. return;
  630. }
  631. m_pAsmCmd++;
  632. m_sdata[i] = '\0';
  633. m_nScan = SCAN_IMPORT;
  634. return;
  635. }
  636. else
  637. {
  638. m_nPrio = 4; // '<'
  639. }
  640. }
  641. else if (m_idata == '>')
  642. {
  643. m_nPrio = 4; // '>'
  644. }
  645. else if (m_idata == '+')
  646. {
  647. m_nPrio = 2; // '+'
  648. }
  649. else if (m_idata == '-')
  650. {
  651. m_nPrio = 2; // '-'
  652. }
  653. else if (m_idata == '*')
  654. {
  655. m_nPrio = 1; // '*'
  656. }
  657. else if (m_idata == '/')
  658. {
  659. m_nPrio = 1; // '/'
  660. }
  661. else if (m_idata == '%')
  662. {
  663. m_nPrio = 1; // '%'
  664. }
  665. else if (m_idata == ']')
  666. {
  667. pcmd = m_pAsmCmd;
  668. Scanasm(SA_NAME);
  669. if (m_nScan != SCAN_SYMB || m_idata != '[')
  670. {
  671. m_idata = ']';
  672. m_pAsmCmd = pcmd;
  673. m_nPrio = 0;
  674. }
  675. else
  676. {
  677. m_idata = '+'; // Translate '][' to '+'
  678. m_nPrio = 2;
  679. }
  680. }
  681. else
  682. {
  683. m_nPrio = 0; // Any other character
  684. }
  685. m_nScan = SCAN_SYMB;
  686. return;
  687. }
  688. }
  689. // Fetches one complete operand from the input line and fills in structure op with operand's data
  690. // Expects that first token of the operand is already
  691. // scanned. Supports operands in generalized form (for example, R32 means any
  692. // of general-purpose 32-bit integer registers).
  693. void CMgAsmBase::Parseasmoperand(t_asmoperand* op)
  694. {
  695. int i, j, bracket, sign, xlataddr;
  696. int reg, r[9];
  697. long offset;
  698. if (m_nScan == SCAN_EOL || m_nScan == SCAN_ERR)
  699. {
  700. return; // No or bad operand
  701. }
  702. // Jump or call address may begin with address size modifier(s) SHORT, LONG,
  703. // NEAR and/or FAR. Not all combinations are allowed. After operand is
  704. // completely parsed, this function roughly checks whether modifier is
  705. // allowed. Exact check is done in Assemble().
  706. if (m_nScan == SCAN_JMPSIZE)
  707. {
  708. j = 0;
  709. while (m_nScan == SCAN_JMPSIZE)
  710. {
  711. j |= m_idata; // Fetch all size modifiers
  712. Scanasm(0);
  713. }
  714. if (
  715. ((j & 0x03) == 0x03) || // Mixed SHORT and LONG
  716. ((j & 0x0C) == 0x0C) || // Mixed NEAR and FAR
  717. ((j & 0x09) == 0x09) // Mixed FAR and SHORT
  718. )
  719. {
  720. m_pAsmError = ("Invalid combination of jump address modifiers");
  721. m_nScan = SCAN_ERR;
  722. return;
  723. }
  724. if ((j & 0x08) == 0)
  725. {
  726. j |= 0x04; // Force NEAR if not FAR
  727. }
  728. op->jmpmode = j;
  729. }
  730. // Simple operands are either register or constant, their processing is
  731. //简单的操作数为寄存器或常数,它们的处理
  732. // obvious and straightforward.
  733. if (m_nScan == SCAN_REG8 || m_nScan == SCAN_REG16 || m_nScan == SCAN_REG32)
  734. {
  735. op->type = REG;
  736. op->index = m_idata; // Integer general-purpose register 整数通用寄存器
  737. if (m_nScan == SCAN_REG8)
  738. {
  739. op->size = 1;
  740. }
  741. else if (m_nScan == SCAN_REG16)
  742. {
  743. op->size = 2;
  744. }
  745. else
  746. {
  747. op->size = 4;
  748. }
  749. }
  750. else if (m_nScan == SCAN_FPU) // FPU register
  751. {
  752. op->type = RST;
  753. op->index = m_idata;
  754. }
  755. else if (m_nScan == SCAN_MMX) // MMX or 3DNow! register
  756. {
  757. op->type = RMX;
  758. op->index = m_idata;
  759. }
  760. else if (m_nScan == SCAN_CR) // Control register
  761. {
  762. op->type = CRX;
  763. op->index = m_idata;
  764. }
  765. else if (m_nScan == SCAN_DR) // Debug register
  766. {
  767. op->type = DRX;
  768. op->index = m_idata;
  769. }
  770. else if (m_nScan == SCAN_SYMB && m_idata == '-')
  771. {
  772. Scanasm(0); // Negative constant
  773. if (m_nScan != SCAN_ICONST && m_nScan != SCAN_DCONST && m_nScan != SCAN_OFS)
  774. {
  775. m_pAsmError = ("Integer number expected");
  776. m_nScan = SCAN_ERR;
  777. return;
  778. }
  779. op->type = IMM;
  780. op->offset = -m_idata;
  781. if (m_nScan == SCAN_OFS)
  782. {
  783. op->anyoffset = 1;
  784. }
  785. }
  786. else if (m_nScan == SCAN_SYMB && m_idata == '+')
  787. {
  788. Scanasm(0); // Positive constant
  789. if (m_nScan != SCAN_ICONST && m_nScan != SCAN_DCONST && m_nScan != SCAN_OFS)
  790. {
  791. m_pAsmError = ("Integer number expected");
  792. m_nScan = SCAN_ERR;
  793. return;
  794. }
  795. op->type = IMM;
  796. op->offset = m_idata;
  797. if (m_nScan == SCAN_OFS)
  798. {
  799. op->anyoffset = 1;
  800. }
  801. }
  802. else if (m_nScan == SCAN_ICONST || m_nScan == SCAN_DCONST || m_nScan == SCAN_OFS)
  803. {
  804. j = m_idata;
  805. if (m_nScan == SCAN_OFS)
  806. {
  807. op->anyoffset = 1;
  808. }
  809. Scanasm(0);
  810. if (m_nScan == SCAN_SYMB && m_idata == ':')
  811. {
  812. Scanasm(0); // Absolute long address (seg:offset)
  813. if (m_nScan != SCAN_ICONST && m_nScan != SCAN_DCONST && m_nScan != SCAN_OFS)
  814. {
  815. m_pAsmError = ("Integer address expected");
  816. m_nScan = SCAN_ERR;
  817. return;
  818. }
  819. op->type = JMF;
  820. op->offset = m_idata;
  821. op->segment = j;
  822. if (m_nScan == SCAN_OFS) op->anyoffset = 1;
  823. }
  824. else
  825. {
  826. op->type = IMM;
  827. op->offset = j; // Constant without sign
  828. return; // Next token already scanned
  829. }
  830. }
  831. else if (m_nScan == SCAN_FCONST)
  832. {
  833. m_pAsmError = ("Floating-point numbers are not allowed in command");
  834. m_nScan = SCAN_ERR;
  835. return;
  836. }
  837. else if (m_nScan == SCAN_SEG || m_nScan == SCAN_OPSIZE || (m_nScan == SCAN_SYMB && m_idata == '[')) // Segment register or address
  838. {
  839. bracket = 0;
  840. if (m_nScan == SCAN_SEG)
  841. {
  842. j = m_idata;
  843. Scanasm(0);
  844. if (m_nScan != SCAN_SYMB || m_idata != ':')
  845. {
  846. op->type = SGM;
  847. op->index = j; // Segment register as operand
  848. return; // Next token already scanned
  849. }
  850. op->segment = j;
  851. Scanasm(0);
  852. }
  853. // Scan 32-bit address. This parser does not support 16-bit addresses.
  854. // First of all, get size of operand (optional), segment register (optional)
  855. // and opening bracket (required).
  856. while (1)
  857. {
  858. if (m_nScan == SCAN_SYMB && m_idata == '[')
  859. {
  860. if (bracket)
  861. {
  862. // Bracket
  863. m_pAsmError = ("Only one opening bracket allowed");
  864. m_nScan = SCAN_ERR;
  865. return;
  866. }
  867. bracket = 1;
  868. }
  869. else if (m_nScan == SCAN_OPSIZE)
  870. {
  871. if (op->size != 0)
  872. {
  873. // Size of operand
  874. m_pAsmError = ("Duplicated size modifier");
  875. m_nScan = SCAN_ERR;
  876. return;
  877. }
  878. op->size = m_idata;
  879. }
  880. else if (m_nScan == SCAN_SEG)
  881. {
  882. if (op->segment != SEG_UNDEF)
  883. {
  884. // Segment register
  885. m_pAsmError = ("Duplicated segment register");
  886. m_nScan = SCAN_ERR;
  887. return;
  888. }
  889. op->segment = m_idata;
  890. Scanasm(0);
  891. if (m_nScan != SCAN_SYMB || m_idata != ':')
  892. {
  893. m_pAsmError = ("Semicolon expected");
  894. m_nScan = SCAN_ERR;
  895. return;
  896. }
  897. }
  898. else if (m_nScan == SCAN_ERR)
  899. {
  900. return;
  901. }
  902. else
  903. {
  904. break; // None of expected address elements
  905. }
  906. Scanasm(0);
  907. }
  908. if (bracket == 0)
  909. {
  910. m_pAsmError = ("Address expression requires brackets");
  911. m_nScan = SCAN_ERR;
  912. return;
  913. }
  914. // Assembling a 32-bit address may be a kind of nigthmare, due to a large
  915. // number of allowed forms. Parser collects immediate offset in op->offset
  916. // and count for each register in array r[]. Then it decides whether this
  917. // combination is valid and determines scale, index and base. Assemble()
  918. // will use these numbers to select address form (with or without SIB byte,
  919. // 8- or 32-bit offset, use segment prefix or not). As a useful side effect
  920. // of this technique, one may specify, for example, [EAX*5] which will
  921. // correctly assemble to [EAX*4+EAX].
  922. for (i = 0; i <= 8; i++)
  923. {
  924. r[i] = 0;
  925. }
  926. sign = '+'; // Default sign for the first operand
  927. xlataddr = 0;
  928. while (1)
  929. {
  930. // Get SIB and offset
  931. if (m_nScan == SCAN_SYMB && (m_idata == '+' || m_idata == '-'))
  932. {
  933. sign = m_idata;
  934. Scanasm(0);
  935. }
  936. if (m_nScan == SCAN_ERR)
  937. {
  938. return;
  939. }
  940. if (sign == '?')
  941. {
  942. m_pAsmError = ("Syntax error");
  943. m_nScan = SCAN_ERR;
  944. return;
  945. }
  946. // Register AL appears as part of operand of (seldom used) command XLAT.
  947. if (m_nScan == SCAN_REG8 && m_idata == REG_EAX)
  948. {
  949. if (sign == '-')
  950. {
  951. m_pAsmError = ("Unable to subtract register");
  952. m_nScan = SCAN_ERR;
  953. return;
  954. }
  955. if (xlataddr != 0)
  956. {
  957. m_pAsmError = ("Too many registers");
  958. m_nScan = SCAN_ERR;
  959. return;
  960. }
  961. xlataddr = 1;
  962. Scanasm(0);
  963. }
  964. else if (m_nScan == SCAN_REG16)
  965. {
  966. m_pAsmError = ("Sorry, 16-bit addressing is not supported");
  967. m_nScan = SCAN_ERR;
  968. return;
  969. }
  970. else if (m_nScan == SCAN_REG32)
  971. {
  972. if (sign == '-')
  973. {
  974. m_pAsmError = ("Unable to subtract register");
  975. m_nScan = SCAN_ERR;
  976. return;
  977. }
  978. reg = m_idata;
  979. Scanasm(0);
  980. if (m_nScan == SCAN_SYMB && m_idata == '*')
  981. {
  982. Scanasm(0); // Try index*scale
  983. if (m_nScan == SCAN_ERR)
  984. {
  985. return;
  986. }
  987. if (m_nScan == SCAN_OFS)
  988. {
  989. m_pAsmError = ("Undefined scale is not allowed");
  990. m_nScan = SCAN_ERR;
  991. return;
  992. }
  993. if (m_nScan != SCAN_ICONST && m_nScan != SCAN_DCONST)
  994. {
  995. m_pAsmError = ("Syntax error");
  996. m_nScan = SCAN_ERR;
  997. return;
  998. }
  999. if (m_idata == 6 || m_idata == 7 || m_idata > 9)
  1000. {
  1001. m_pAsmError = ("Invalid scale");
  1002. m_nScan = SCAN_ERR;
  1003. return;
  1004. }
  1005. r[reg] += m_idata;
  1006. Scanasm(0);
  1007. }
  1008. else
  1009. {
  1010. r[reg]++; // Simple register
  1011. }
  1012. }
  1013. else if (m_nScan == SCAN_LOCAL)
  1014. {
  1015. r[REG_EBP]++;
  1016. op->offset -= m_idata * 4;
  1017. Scanasm(0);
  1018. }
  1019. else if (m_nScan == SCAN_ARG)
  1020. {
  1021. r[REG_EBP]++;
  1022. op->offset += (m_idata + 1) * 4;
  1023. Scanasm(0);
  1024. }
  1025. else if (m_nScan == SCAN_ICONST || m_nScan == SCAN_DCONST)
  1026. {
  1027. offset = m_idata;
  1028. Scanasm(0);
  1029. if (m_nScan == SCAN_SYMB && m_idata == '*')
  1030. {
  1031. Scanasm(0); // Try scale*index
  1032. if (m_nScan == SCAN_ERR)
  1033. {
  1034. return;
  1035. }
  1036. if (sign == '-')
  1037. {
  1038. m_pAsmError = ("Unable to subtract register");
  1039. m_nScan = SCAN_ERR;
  1040. return;
  1041. }
  1042. if (m_nScan == SCAN_REG16)
  1043. {
  1044. m_pAsmError = ("Sorry, 16-bit addressing is not supported");
  1045. m_nScan = SCAN_ERR;
  1046. return;
  1047. }
  1048. if (m_nScan != SCAN_REG32)
  1049. {
  1050. m_pAsmError = ("Syntax error");
  1051. m_nScan = SCAN_ERR;
  1052. return;
  1053. }
  1054. if (offset == 6 || offset == 7 || offset > 9)
  1055. {
  1056. m_pAsmError = ("Invalid scale");
  1057. m_nScan = SCAN_ERR;
  1058. return;
  1059. }
  1060. r[m_idata] += offset;
  1061. Scanasm(0);
  1062. }
  1063. else
  1064. {
  1065. if (sign == '-')
  1066. {
  1067. op->offset -= offset;
  1068. }
  1069. else
  1070. {
  1071. op->offset += offset;
  1072. }
  1073. }
  1074. }
  1075. else if (m_nScan == SCAN_OFS)
  1076. {
  1077. Scanasm(0);
  1078. if (m_nScan == SCAN_SYMB && m_idata == '*')
  1079. {
  1080. m_pAsmError = ("Undefined scale is not allowed");
  1081. m_nScan = SCAN_ERR;
  1082. return;
  1083. }
  1084. else
  1085. {
  1086. op->anyoffset = 1;
  1087. }
  1088. }
  1089. else
  1090. {
  1091. break; // None of expected address elements
  1092. }
  1093. if (m_nScan == SCAN_SYMB && m_idata == ']')
  1094. {
  1095. break;
  1096. }
  1097. sign = '?';
  1098. }
  1099. if (m_nScan == SCAN_ERR)
  1100. {
  1101. return;
  1102. }
  1103. if (m_nScan != SCAN_SYMB || m_idata != ']')
  1104. {
  1105. m_pAsmError = ("Syntax error");
  1106. m_nScan = SCAN_ERR;
  1107. return;
  1108. }
  1109. // Process XLAT address separately.
  1110. if (xlataddr != 0)
  1111. {
  1112. // XLAT address in form [EBX+AX]
  1113. for (i = 0; i <= 8; i++)
  1114. {
  1115. // Check which registers used
  1116. if (i == REG_EBX)
  1117. {
  1118. continue;
  1119. }
  1120. if (r[i] != 0) break;
  1121. }
  1122. if (i <= 8 || r[REG_EBX] != 1 || op->offset != 0 || op->anyoffset != 0)
  1123. {
  1124. m_pAsmError = ("Invalid address");
  1125. m_nScan = SCAN_ERR;
  1126. return;
  1127. }
  1128. op->type = MXL;
  1129. }
  1130. else // Determine scale, index and base.
  1131. {
  1132. j = 0; // Number of used registers
  1133. for (i = 0; i <= 8; i++)
  1134. {
  1135. if (r[i] == 0)
  1136. {
  1137. continue; // Unused register
  1138. }
  1139. if (r[i] == 3 || r[i] == 5 || r[i] == 9)
  1140. {
  1141. if (op->index >= 0 || op->base >= 0)
  1142. {
  1143. if (j == 0)
  1144. {
  1145. m_pAsmError = ("Invalid scale");
  1146. }
  1147. else
  1148. {
  1149. m_pAsmError = ("Too many registers");
  1150. }
  1151. m_nScan = SCAN_ERR;
  1152. return;
  1153. }
  1154. op->index = op->base = i;
  1155. op->scale = r[i] - 1;
  1156. }
  1157. else if (r[i] == 2 || r[i] == 4 || r[i] == 8)
  1158. {
  1159. if (op->index >= 0)
  1160. {
  1161. if (j <= 1)
  1162. {
  1163. m_pAsmError = ("Only one register may be scaled");
  1164. }
  1165. else
  1166. {
  1167. m_pAsmError = ("Too many registers");
  1168. }
  1169. m_nScan = SCAN_ERR;
  1170. return;
  1171. }
  1172. op->index = i;
  1173. op->scale = r[i];
  1174. }
  1175. else if (r[i] == 1)
  1176. {
  1177. if (op->base < 0)
  1178. {
  1179. op->base = i;
  1180. }
  1181. else if (op->index < 0)
  1182. {
  1183. op->index = i;
  1184. op->scale = 1;
  1185. }
  1186. else
  1187. {
  1188. m_pAsmError = ("Too many registers");
  1189. m_nScan = SCAN_ERR;
  1190. return;
  1191. }
  1192. }
  1193. else
  1194. {
  1195. m_pAsmError = ("Invalid scale");
  1196. m_nScan = SCAN_ERR;
  1197. return;
  1198. }
  1199. j++;
  1200. }
  1201. op->type = MRG;
  1202. }
  1203. }
  1204. else
  1205. {
  1206. m_pAsmError = ("Unrecognized operand");
  1207. m_nScan = SCAN_ERR;
  1208. return;
  1209. }
  1210. // In general, address modifier is allowed only with address expression which
  1211. // is a constant, a far address or a memory expression. More precise check
  1212. // will be done later in Assemble().
  1213. if (op->jmpmode != 0 && op->type != IMM && op->type != JMF && op->type != MRG)
  1214. {
  1215. m_pAsmError = ("Jump address modifier is not allowed");
  1216. m_nScan = SCAN_ERR;
  1217. return;
  1218. }
  1219. Scanasm(0); // Fetch next token from input line
  1220. }
  1221. //--------------------------------------------------------------------------------
  1222. // Function assembles text into 32-bit 80x86 machine code. It supports imprecise
  1223. // operands (for example, R32 stays for any general-purpose 32-bit register).
  1224. // This allows to search for incomplete commands. Command is precise when all
  1225. // significant bytes in model.mask are 0xFF. Some commands have more than one
  1226. // decoding. By calling Assemble() with attempt=0,1... and constsize=0,1,2,3 one
  1227. // gets also alternative variants (bit 0x1 of constsize is responsible for size
  1228. // of address constant and bit 0x2 - for immediate data). However, only one
  1229. // address form is generated ([EAX*2], but not [EAX+EAX]; [EBX+EAX] but not
  1230. // [EAX+EBX]; [EAX] will not use SIB byte; no DS: prefix and so on). Returns
  1231. // number of bytes in assembled code or non-positive number in case of detected
  1232. // error. This number is the negation of the offset in the input text where the
  1233. // error encountered. Unfortunately, BC 4.52 is unable to compile the switch
  1234. // (arg) in this code when any common subexpression optimization is on. The
  1235. // next #pragma statement disables all optimizations.
  1236. //--------------------------------------------------------------------------------
  1237. int CMgAsmBase::Assemble(char* cmd, ulong ip, t_asmmodel* model, int attempt, int constsize, char* errtext)
  1238. {
  1239. int i, j, k, namelen, nameok, arg, match, datasize, addrsize, bytesize, minop, maxop;
  1240. int rep, lock, segment, jmpsize, jmpmode, longjump;
  1241. int hasrm, hassib, dispsize, immsize;
  1242. int anydisp, anyimm, anyjmp;
  1243. long l, displacement, immediate, jmpoffset;
  1244. char name[32], * nameend;
  1245. unsigned char tcode[MAXCMDSIZE], tmask[MAXCMDSIZE];
  1246. t_asmoperand aop[3], * op; // 最多允许3个操作数 Allows up to three operands
  1247. t_cmddata* pd;
  1248. if (model != NULL)
  1249. {
  1250. model->length = 0;
  1251. }
  1252. if (cmd == NULL || model == NULL || errtext == NULL)
  1253. {
  1254. if (errtext != NULL) strcpy(errtext, ("Internal OLLYDBG error"));
  1255. return 0;
  1256. } // 错误的参数 Wrong parameter
  1257. //
  1258. m_pAsmCmd = cmd;
  1259. rep = lock = 0;
  1260. errtext[0] = '\0';
  1261. //
  1262. Scanasm(SA_NAME);
  1263. if (m_nScan == SCAN_EOL) // 行结束, nothing to assemble
  1264. {
  1265. return 0;
  1266. }
  1267. while (1)
  1268. {
  1269. // Fetch all REPxx and LOCK prefixes 取所有REPxx和LOCK前缀
  1270. if (m_nScan == SCAN_REP || m_nScan == SCAN_REPE || m_nScan == SCAN_REPNE)
  1271. {
  1272. if (rep != 0)
  1273. {
  1274. strcpy(errtext, ("Duplicated REP prefix(重复REP前缀)"));
  1275. goto error;
  1276. }
  1277. rep = m_nScan;
  1278. }
  1279. else if (m_nScan == SCAN_LOCK)
  1280. {
  1281. if (lock != 0)
  1282. {
  1283. strcpy(errtext, ("Duplicated LOCK prefix(重复LOCK前缀)"));
  1284. goto error;
  1285. };
  1286. lock = m_nScan;
  1287. }
  1288. else
  1289. {
  1290. break; // No more prefixes 没有更多的前缀
  1291. }
  1292. Scanasm(SA_NAME);
  1293. }
  1294. if (m_nScan != SCAN_NAME || m_idata > 16)
  1295. {
  1296. strcpy(errtext, ("Command mnemonic expected(预期指令助记符)"));
  1297. goto error;
  1298. }
  1299. //
  1300. nameend = m_pAsmCmd;
  1301. strupr(m_sdata);
  1302. // Prepare full mnemonic (including repeat prefix, if any). 获取操作符
  1303. if (rep == SCAN_REP)
  1304. {
  1305. sprintf(name, ("REP %s"), m_sdata);
  1306. }
  1307. else if (rep == SCAN_REPE)
  1308. {
  1309. sprintf(name, ("REPE %s"), m_sdata);
  1310. }
  1311. else if (rep == SCAN_REPNE)
  1312. {
  1313. sprintf(name, ("REPNE %s"), m_sdata);
  1314. }
  1315. else
  1316. {
  1317. strcpy(name, m_sdata);
  1318. }
  1319. Scanasm(0);
  1320. // Parse command operands (up to 3). Note: jump address is always the first
  1321. // (and only) operand in actual command set.
  1322. //分析指令的操作数(最多3个) 。注:跳转地址永远是第一位
  1323. for (i = 0; i < 3; i++)
  1324. {
  1325. aop[i].type = NNN; // No operand 无操作数
  1326. aop[i].size = 0; // Undefined size 未定义的大小
  1327. aop[i].index = -1; // No index 没有索引
  1328. aop[i].scale = 0; // No scale 没有形成规模
  1329. aop[i].base = -1; // No base 无基址
  1330. aop[i].offset = 0; // No offset 无偏移
  1331. aop[i].anyoffset = 0; // No offset 无偏移
  1332. aop[i].segment = SEG_UNDEF; // No segment 在段
  1333. aop[i].jmpmode = 0; // No jump size modifier
  1334. }
  1335. //
  1336. Parseasmoperand(aop + 0);
  1337. jmpmode = aop[0].jmpmode;
  1338. if (jmpmode != 0)
  1339. {
  1340. jmpmode |= 0x80;
  1341. }
  1342. if (m_nScan == SCAN_SYMB && m_idata == ',')
  1343. {
  1344. Scanasm(0);
  1345. Parseasmoperand(aop + 1);
  1346. if (m_nScan == SCAN_SYMB && m_idata == ',')
  1347. {
  1348. Scanasm(0);
  1349. Parseasmoperand(aop + 2);
  1350. }
  1351. }
  1352. if (m_nScan == SCAN_ERR)
  1353. {
  1354. strcpy(errtext, m_pAsmError);
  1355. goto error;
  1356. };
  1357. if (m_nScan != SCAN_EOL)
  1358. {
  1359. strcpy(errtext, ("Extra input after operand"));
  1360. goto error;
  1361. };
  1362. // If jump size is not specified, function tries to use short jump. If
  1363. // attempt fails, it retries with long form.
  1364. longjump = 0; // Try short jump on the first pass 在第一轮尝试短跳
  1365. //label:-------------------------------
  1366. retrylongjump:
  1367. nameok = 0;
  1368. // Some commands allow different number of operands. Variables minop and
  1369. // maxop accumulate their minimal and maximal counts. The numbers are not
  1370. // used in assembly process but allow for better error diagnostics.
  1371. minop = 3;
  1372. maxop = 0;
  1373. // Main assembly loop: try to find the command which matches all operands,
  1374. // but do not process operands yet.
  1375. namelen = strlen(name);
  1376. for (pd = g_CmdData; pd->mask != 0; pd++)
  1377. {
  1378. //匹配操作符
  1379. if (pd->name[0] == '&')
  1380. {
  1381. // Mnemonic depends on operand size
  1382. j = 1;
  1383. datasize = 2;
  1384. addrsize = 4;
  1385. while (1)
  1386. {
  1387. // Try all mnemonics (separated by ':')
  1388. for (i = 0; pd->name[j] != '\0' && pd->name[j] != ':'; j++)
  1389. {
  1390. if (pd->name[j] == '*')
  1391. {
  1392. if (name[i] == 'W')
  1393. {
  1394. datasize = 2;
  1395. i++;
  1396. }
  1397. else if (name[i] == 'D')
  1398. {
  1399. datasize = 4;
  1400. i++;
  1401. }
  1402. else if (m_nSizeSens == 0)
  1403. {
  1404. datasize = 2;
  1405. }
  1406. else
  1407. {
  1408. datasize = 4;
  1409. }
  1410. }
  1411. else if (pd->name[j] == name[i])
  1412. {
  1413. i++;
  1414. }
  1415. else
  1416. {
  1417. break;
  1418. }
  1419. }
  1420. if (name[i] == '\0' && (pd->name[j] == '\0' || pd->name[j] == ':'))
  1421. {
  1422. break; // Bingo!
  1423. }
  1424. while (pd->name[j] != '\0' && pd->name[j] != ':')
  1425. {
  1426. j++;
  1427. }
  1428. if (pd->name[j] == ':')
  1429. {
  1430. j++; // Retry with 32-bit mnenonic
  1431. datasize = 4;
  1432. }
  1433. else
  1434. {
  1435. i = 0; // Comparison failed
  1436. break;
  1437. }
  1438. }
  1439. if (i == 0)
  1440. {
  1441. continue;
  1442. }
  1443. }
  1444. else if (pd->name[0] == '$') // Mnemonic depends on address size
  1445. {
  1446. j = 1;
  1447. datasize = 0;
  1448. addrsize = 2;
  1449. while (1)
  1450. {
  1451. // Try all mnemonics (separated by ':')
  1452. for (i = 0; pd->name[j] != '\0' && pd->name[j] != ':'; j++)
  1453. {
  1454. if (pd->name[j] == '*')
  1455. {
  1456. if (name[i] == 'W')
  1457. {
  1458. addrsize = 2;
  1459. i++;
  1460. }
  1461. else if (name[i] == 'D')
  1462. {
  1463. addrsize = 4;
  1464. i++;
  1465. }
  1466. else if (m_nSizeSens == 0)
  1467. {
  1468. addrsize = 2;
  1469. }
  1470. else
  1471. {
  1472. addrsize = 4;
  1473. }
  1474. }
  1475. else if (pd->name[j] == name[i])
  1476. {
  1477. i++;
  1478. }
  1479. else
  1480. {
  1481. break;
  1482. }
  1483. }
  1484. if (name[i] == '\0' && (pd->name[j] == '\0' || pd->name[j] == ':'))
  1485. {
  1486. break; // Bingo!
  1487. }
  1488. while (pd->name[j] != '\0' && pd->name[j] != ':')
  1489. {
  1490. j++;
  1491. }
  1492. if (pd->name[j] == ':')
  1493. {
  1494. j++; // Retry with 32-bit mnenonic
  1495. addrsize = 4;
  1496. }
  1497. else
  1498. {
  1499. i = 0; // Comparison failed
  1500. break;
  1501. }
  1502. }
  1503. if (i == 0)
  1504. {
  1505. continue;
  1506. }
  1507. }
  1508. else // Compare with all synonimes
  1509. {
  1510. j = k = 0;
  1511. datasize = 0; // Default settings
  1512. addrsize = 4;
  1513. while (1)
  1514. {
  1515. while (pd->name[j] != ',' && pd->name[j] != '\0')
  1516. {
  1517. j++;
  1518. }
  1519. if (j - k == namelen && strnicmp(name, pd->name + k, namelen) == 0)//匹配操作符
  1520. {
  1521. break;
  1522. }
  1523. k = j + 1;
  1524. if (pd->name[j] == '\0')
  1525. {
  1526. break;
  1527. }
  1528. j = k;
  1529. }
  1530. if (k > j)
  1531. {
  1532. continue;
  1533. }
  1534. }
  1535. // For error diagnostics it is important to know whether mnemonic exists.
  1536. nameok++;
  1537. if (pd->arg1 == NNN || pd->arg1 >= PSEUDOOP)
  1538. {
  1539. minop = 0;
  1540. }
  1541. else if (pd->arg2 == NNN || pd->arg2 >= PSEUDOOP)
  1542. {
  1543. if (minop > 1)
  1544. {
  1545. minop = 1;
  1546. }
  1547. if (maxop < 1)
  1548. {
  1549. maxop = 1;
  1550. }
  1551. }
  1552. else if (pd->arg3 == NNN || pd->arg3 >= PSEUDOOP)
  1553. {
  1554. if (minop > 2)
  1555. {
  1556. minop = 2;
  1557. }
  1558. if (maxop < 2)
  1559. {
  1560. maxop = 2;
  1561. }
  1562. }
  1563. else
  1564. {
  1565. maxop = 3;
  1566. }
  1567. // Determine default and allowed operand size(s).
  1568. if (pd->bits == FF)
  1569. {
  1570. datasize = 2; // Forced 16-bit size
  1571. }
  1572. if (pd->bits == WW || pd->bits == WS || pd->bits == W3 || pd->bits == WP)
  1573. {
  1574. bytesize = 1; // 1-byte size allowed
  1575. }
  1576. else
  1577. {
  1578. bytesize = 0; // Word/dword size only
  1579. }
  1580. // Check whether command operands match specified. If so, variable match
  1581. // remains zero, otherwise it contains kind of mismatch. This allows for
  1582. // better error diagnostics.
  1583. match = 0;
  1584. for (j = 0; j < 3; j++)
  1585. {
  1586. // Up to 3 operands
  1587. op = aop + j;
  1588. if (j == 0)
  1589. {
  1590. arg = pd->arg1;
  1591. }
  1592. else if (j == 1)
  1593. {
  1594. arg = pd->arg2;
  1595. }
  1596. else
  1597. {
  1598. arg = pd->arg3;
  1599. }
  1600. if (arg == NNN || arg >= PSEUDOOP)
  1601. {
  1602. if (op->type != NNN) // No more arguments
  1603. {
  1604. match |= MA_NOP;
  1605. }
  1606. break;
  1607. }
  1608. if (op->type == NNN)
  1609. {
  1610. match |= MA_NOP; // No corresponding operand
  1611. break;
  1612. }
  1613. //
  1614. switch (arg)
  1615. {
  1616. case REG: // Integer register in Reg field
  1617. case RCM: // Integer register in command byte
  1618. case RAC: // Accumulator (AL/AX/EAX, implicit)
  1619. {
  1620. if (op->type != REG)
  1621. {
  1622. match |= MA_TYP;
  1623. }
  1624. if (arg == RAC && op->index != REG_EAX && op->index != 8)
  1625. {
  1626. match |= MA_TYP;
  1627. }
  1628. if (bytesize == 0 && op->size == 1)
  1629. {
  1630. match |= MA_SIZ;
  1631. }
  1632. if (datasize == 0)
  1633. {
  1634. datasize = op->size;
  1635. }
  1636. if (datasize != op->size)
  1637. {
  1638. match |= MA_DIF;
  1639. }
  1640. break;
  1641. }
  1642. case RG4: // Integer 4-byte register in Reg field
  1643. {
  1644. if (op->type != REG)
  1645. {
  1646. match |= MA_TYP;
  1647. }
  1648. if (op->size != 4)
  1649. {
  1650. match |= MA_SIZ;
  1651. }
  1652. if (datasize == 0)
  1653. {
  1654. datasize = op->size;
  1655. }
  1656. if (datasize != op->size)
  1657. {
  1658. match |= MA_DIF;
  1659. }
  1660. break;
  1661. }
  1662. case RAX: // AX (2-byte, implicit)
  1663. {
  1664. if (op->type != REG || (op->index != REG_EAX && op->index != 8))
  1665. {
  1666. match |= MA_TYP;
  1667. }
  1668. if (op->size != 2)
  1669. {
  1670. match |= MA_SIZ;
  1671. }
  1672. if (datasize == 0)
  1673. {
  1674. datasize = op->size;
  1675. }
  1676. if (datasize != op->size)
  1677. {
  1678. match |= MA_DIF;
  1679. }
  1680. break;
  1681. }
  1682. case RDX: // DX (16-bit implicit port address)
  1683. {
  1684. if (op->type != REG || (op->index != REG_EDX && op->index != 8))
  1685. {
  1686. match |= MA_TYP;
  1687. }
  1688. if (op->size != 2)
  1689. {
  1690. match |= MA_SIZ;
  1691. }
  1692. break;
  1693. }
  1694. case RCL: // Implicit CL register (for shifts)
  1695. {
  1696. if (op->type != REG || (op->index != REG_ECX && op->index != 8))
  1697. {
  1698. match |= MA_TYP;
  1699. }
  1700. if (op->size != 1)
  1701. {
  1702. match |= MA_SIZ;
  1703. }
  1704. break;
  1705. }
  1706. case RS0: // Top of FPU stack (ST(0))
  1707. {
  1708. if (op->type != RST || (op->index != 0 && op->index != 8))
  1709. {
  1710. match |= MA_TYP;
  1711. }
  1712. break;
  1713. }
  1714. case RST: // FPU register (ST(i)) in command byte
  1715. {
  1716. if (op->type != RST)
  1717. {
  1718. match |= MA_TYP;
  1719. }
  1720. break;
  1721. }
  1722. case RMX: // MMX register MMx
  1723. case R3D: // 3DNow! register MMx
  1724. {
  1725. if (op->type != RMX)
  1726. {
  1727. match |= MA_TYP;
  1728. }
  1729. break;
  1730. }
  1731. case MRG: // Memory/register in ModRM byte
  1732. {
  1733. if (op->type != MRG && op->type != REG)
  1734. {
  1735. match |= MA_TYP;
  1736. }
  1737. if (bytesize == 0 && op->size == 1)
  1738. {
  1739. match |= MA_SIZ;
  1740. }
  1741. if (datasize == 0)
  1742. {
  1743. datasize = op->size;
  1744. }
  1745. if (op->size != 0 && op->size != datasize)
  1746. {
  1747. match |= MA_DIF;
  1748. }
  1749. break;
  1750. }
  1751. case MR1: // 1-byte memory/register in ModRM byte
  1752. {
  1753. if (op->type != MRG && op->type != REG)
  1754. {
  1755. match |= MA_TYP;
  1756. }
  1757. if (op->size != 0 && op->size != 1)
  1758. {
  1759. match |= MA_SIZ;
  1760. }
  1761. break;
  1762. }
  1763. case MR2: // 2-byte memory/register in ModRM byte
  1764. {
  1765. if (op->type != MRG && op->type != REG)
  1766. {
  1767. match |= MA_TYP;
  1768. }
  1769. if (op->size != 0 && op->size != 2)
  1770. {
  1771. match |= MA_SIZ;
  1772. }
  1773. break;
  1774. }
  1775. case MR4: // 4-byte memory/register in ModRM byte
  1776. {
  1777. if (op->type != MRG && op->type != REG)
  1778. {
  1779. match |= MA_TYP;
  1780. }
  1781. if (op->size != 0 && op->size != 4)
  1782. {
  1783. match |= MA_SIZ;
  1784. }
  1785. break;
  1786. }
  1787. case RR4: // 4-byte memory/register (register only)
  1788. {
  1789. if (op->type != REG)
  1790. {
  1791. match |= MA_TYP;
  1792. }
  1793. if (op->size != 0 && op->size != 4)
  1794. {
  1795. match |= MA_SIZ;
  1796. }
  1797. break;
  1798. }
  1799. case MRJ: // Memory/reg in ModRM as JUMP target
  1800. {
  1801. if (op->type != MRG && op->type != REG)
  1802. {
  1803. match |= MA_TYP;
  1804. }
  1805. if (op->size != 0 && op->size != 4)
  1806. {
  1807. match |= MA_SIZ;
  1808. }
  1809. if ((jmpmode & 0x09) != 0)
  1810. {
  1811. match |= MA_JMP;
  1812. }
  1813. jmpmode &= 0x7F;
  1814. break;
  1815. }
  1816. case MR8: // 8-byte memory/MMX register in ModRM
  1817. case MRD: // 8-byte memory/3DNow! register in ModRM
  1818. {
  1819. if (op->type != MRG && op->type != RMX)
  1820. {
  1821. match |= MA_TYP;
  1822. }
  1823. if (op->size != 0 && op->size != 8)
  1824. {
  1825. match |= MA_SIZ;
  1826. }
  1827. break;
  1828. }
  1829. case RR8: // 8-byte MMX register only in ModRM
  1830. case RRD: // 8-byte memory/3DNow! (register only)
  1831. {
  1832. if (op->type != RMX)
  1833. {
  1834. match |= MA_TYP;
  1835. }
  1836. if (op->size != 0 && op->size != 8)
  1837. {
  1838. match |= MA_SIZ;
  1839. }
  1840. break;
  1841. }
  1842. case MMA: // Memory address in ModRM byte for LEA
  1843. {
  1844. if (op->type != MRG)
  1845. {
  1846. match |= MA_TYP;
  1847. }
  1848. break;
  1849. }
  1850. case MML: // Memory in ModRM byte (for LES)
  1851. {
  1852. if (op->type != MRG)
  1853. {
  1854. match |= MA_TYP;
  1855. }
  1856. if (op->size != 0 && op->size != 6)
  1857. {
  1858. match |= MA_SIZ;
  1859. }
  1860. if (datasize == 0)
  1861. {
  1862. datasize = 4;
  1863. }
  1864. else if (datasize != 4)
  1865. {
  1866. match |= MA_DIF;
  1867. }
  1868. break;
  1869. }
  1870. case MMS: // Memory in ModRM byte (as SEG:OFFS)
  1871. {
  1872. if (op->type != MRG)
  1873. {
  1874. match |= MA_TYP;
  1875. }
  1876. if (op->size != 0 && op->size != 6)
  1877. {
  1878. match |= MA_SIZ;
  1879. }
  1880. if ((jmpmode & 0x07) != 0)
  1881. {
  1882. match |= MA_JMP;
  1883. }
  1884. jmpmode &= 0x7F;
  1885. break;
  1886. }
  1887. case MM6: // Memory in ModRm (6-byte descriptor)
  1888. {
  1889. if (op->type != MRG)
  1890. {
  1891. match |= MA_TYP;
  1892. }
  1893. if (op->size != 0 && op->size != 6)
  1894. {
  1895. match |= MA_SIZ;
  1896. }
  1897. break;
  1898. }
  1899. case MMB: // Two adjacent memory locations (BOUND)
  1900. {
  1901. if (op->type != MRG)
  1902. {
  1903. match |= MA_TYP;
  1904. }
  1905. k = op->size;
  1906. if (m_nIDEAL == 0 && k > 1)
  1907. {
  1908. k /= 2;
  1909. }
  1910. if (k != 0 && k != datasize)
  1911. {
  1912. match |= MA_DIF;
  1913. }
  1914. break;
  1915. }
  1916. case MD2: // Memory in ModRM byte (16-bit integer)
  1917. case MB2: // Memory in ModRM byte (16-bit binary)
  1918. {
  1919. if (op->type != MRG)
  1920. {
  1921. match |= MA_TYP;
  1922. }
  1923. if (op->size != 0 && op->size != 2)
  1924. {
  1925. match |= MA_SIZ;
  1926. }
  1927. break;
  1928. }
  1929. case MD4: // Memory in ModRM byte (32-bit integer)
  1930. case MF4: // Memory in ModRM byte (32-bit float)
  1931. {
  1932. if (op->type != MRG)
  1933. {
  1934. match |= MA_TYP;
  1935. }
  1936. if (op->size != 0 && op->size != 4)
  1937. {
  1938. match |= MA_SIZ;
  1939. }
  1940. break;
  1941. }
  1942. case MD8: // Memory in ModRM byte (64-bit integer)
  1943. case MF8: // Memory in ModRM byte (64-bit float)
  1944. {
  1945. if (op->type != MRG)
  1946. {
  1947. match |= MA_TYP;
  1948. }
  1949. if (op->size != 0 && op->size != 8)
  1950. {
  1951. match |= MA_SIZ;
  1952. }
  1953. break;
  1954. }
  1955. case MDA: // Memory in ModRM byte (80-bit BCD)
  1956. case MFA: // Memory in ModRM byte (80-bit float)
  1957. {
  1958. if (op->type != MRG)
  1959. {
  1960. match |= MA_TYP;
  1961. }
  1962. if (op->size != 0 && op->size != 10)
  1963. {
  1964. match |= MA_SIZ;
  1965. }
  1966. break;
  1967. }
  1968. case MFE: // Memory in ModRM byte (FPU environment)
  1969. case MFS: // Memory in ModRM byte (FPU state)
  1970. case MFX: // Memory in ModRM byte (ext. FPU state)
  1971. {
  1972. if (op->type != MRG)
  1973. {
  1974. match |= MA_TYP;
  1975. }
  1976. if (op->size != 0)
  1977. {
  1978. match |= MA_SIZ;
  1979. }
  1980. break;
  1981. }
  1982. case MSO: // Source in string operands ([ESI])
  1983. {
  1984. if (op->type != MRG || op->base != REG_ESI || op->index != -1 ||
  1985. op->offset != 0 || op->anyoffset != 0)
  1986. {
  1987. match |= MA_TYP;
  1988. }
  1989. if (datasize == 0)
  1990. {
  1991. datasize = op->size;
  1992. }
  1993. if (op->size != 0 && op->size != datasize)
  1994. {
  1995. match |= MA_DIF;
  1996. }
  1997. break;
  1998. }
  1999. case MDE: // Destination in string operands ([EDI])
  2000. {
  2001. if (op->type != MRG || op->base != REG_EDI ||
  2002. op->index != -1 || op->offset != 0 || op->anyoffset != 0)
  2003. {
  2004. match |= MA_TYP;
  2005. }
  2006. if (op->segment != SEG_UNDEF && op->segment != SEG_ES)
  2007. {
  2008. match |= MA_SEG;
  2009. }
  2010. if (datasize == 0)
  2011. {
  2012. datasize = op->size;
  2013. }
  2014. if (op->size != 0 && op->size != datasize)
  2015. {
  2016. match |= MA_DIF;
  2017. }
  2018. break;
  2019. }
  2020. case MXL: // XLAT operand ([EBX+AL])
  2021. {
  2022. if (op->type != MXL)
  2023. {
  2024. match |= MA_TYP;
  2025. }
  2026. break;
  2027. }
  2028. case IMM: // Immediate data (8 or 16/32)
  2029. case IMU: // Immediate unsigned data (8 or 16/32)
  2030. {
  2031. if (op->type != IMM)
  2032. {
  2033. match |= MA_TYP;
  2034. }
  2035. break;
  2036. }
  2037. case VXD: // VxD service (32-bit only)
  2038. {
  2039. if (op->type != IMM)
  2040. {
  2041. match |= MA_TYP;
  2042. }
  2043. if (datasize == 0)
  2044. {
  2045. datasize = 4;
  2046. }
  2047. if (datasize != 4)
  2048. {
  2049. match |= MA_SIZ;
  2050. }
  2051. break;
  2052. }
  2053. case JMF: // Immediate absolute far jump/call addr
  2054. {
  2055. if (op->type != JMF)
  2056. {
  2057. match |= MA_TYP;
  2058. }
  2059. if ((jmpmode & 0x05) != 0)
  2060. {
  2061. match |= MA_JMP;
  2062. }
  2063. jmpmode &= 0x7F;
  2064. break;
  2065. }
  2066. case JOB: // Immediate byte offset (for jumps)
  2067. {
  2068. if (op->type != IMM || longjump)
  2069. {
  2070. match |= MA_TYP;
  2071. }
  2072. if ((jmpmode & 0x0A) != 0)
  2073. {
  2074. match |= MA_JMP;
  2075. }
  2076. jmpmode &= 0x7F;
  2077. break;
  2078. }
  2079. case JOW: // Immediate full offset (for jumps)
  2080. {
  2081. if (op->type != IMM)
  2082. {
  2083. match |= MA_TYP;
  2084. }
  2085. if ((jmpmode & 0x09) != 0)
  2086. {
  2087. match |= MA_JMP;
  2088. }
  2089. jmpmode &= 0x7F;
  2090. break;
  2091. }
  2092. case IMA: // Immediate absolute near data address
  2093. {
  2094. if (op->type != MRG || op->base >= 0 || op->index >= 0)
  2095. {
  2096. match |= MA_TYP;
  2097. }
  2098. break;
  2099. }
  2100. case IMX: // Immediate sign-extendable byte
  2101. {
  2102. if (op->type != IMM)
  2103. {
  2104. match |= MA_TYP;
  2105. }
  2106. if (op->offset < -128 || op->offset > 127)
  2107. {
  2108. match |= MA_RNG;
  2109. }
  2110. break;
  2111. }
  2112. case C01: // Implicit constant 1 (for shifts)
  2113. {
  2114. if (op->type != IMM || (op->offset != 1 && op->anyoffset == 0))
  2115. {
  2116. match |= MA_TYP;
  2117. }
  2118. break;
  2119. }
  2120. case IMS: // Immediate byte (for shifts)
  2121. case IM1: // Immediate byte
  2122. {
  2123. if (op->type != IMM)
  2124. {
  2125. match |= MA_TYP;
  2126. }
  2127. if (op->offset < -128 || op->offset > 255)
  2128. {
  2129. match |= MA_RNG;
  2130. }
  2131. break;
  2132. }
  2133. case IM2: // Immediate word (ENTER/RET)
  2134. {
  2135. if (op->type != IMM)
  2136. {
  2137. match |= MA_TYP;
  2138. }
  2139. if (op->offset < 0 || op->offset > 65535)
  2140. {
  2141. match |= MA_RNG;
  2142. }
  2143. break;
  2144. }
  2145. case SGM: // Segment register in ModRM byte
  2146. {
  2147. if (op->type != SGM)
  2148. {
  2149. match |= MA_TYP;
  2150. }
  2151. if (datasize == 0)
  2152. {
  2153. datasize = 2;
  2154. }
  2155. if (datasize != 2)
  2156. {
  2157. match |= MA_DIF;
  2158. }
  2159. break;
  2160. }
  2161. case SCM: // Segment register in command byte
  2162. {
  2163. if (op->type != SGM)
  2164. {
  2165. match |= MA_TYP;
  2166. }
  2167. break;
  2168. }
  2169. case CRX: // Control register CRx
  2170. case DRX: // Debug register DRx
  2171. {
  2172. if (op->type != arg)
  2173. {
  2174. match |= MA_TYP;
  2175. }
  2176. if (datasize == 0)
  2177. {
  2178. datasize = 4;
  2179. }
  2180. if (datasize != 4)
  2181. {
  2182. match |= MA_DIF;
  2183. }
  2184. break;
  2185. }
  2186. case PRN: // Near return address (pseudooperand)
  2187. case PRF: // Far return address (pseudooperand)
  2188. case PAC: // Accumulator (AL/AX/EAX, pseudooperand)
  2189. case PAH: // AH (in LAHF/SAHF, pseudooperand)
  2190. case PFL: // Lower byte of flags (pseudooperand)
  2191. case PS0: // Top of FPU stack (pseudooperand)
  2192. case PS1: // ST(1) (pseudooperand)
  2193. case PCX: // CX/ECX (pseudooperand)
  2194. case PDI: // EDI (pseudooperand in MMX extentions)
  2195. {
  2196. break;
  2197. }
  2198. default: // Undefined type of operand
  2199. {
  2200. strcpy(errtext, ("Internal Assembler error"));
  2201. goto error;
  2202. }
  2203. } // End of switch (arg)
  2204. if ((jmpmode & 0x80) != 0)
  2205. {
  2206. match |= MA_JMP;
  2207. }
  2208. if (match != 0)
  2209. {
  2210. break; // Some of the operands doesn't match
  2211. }
  2212. } // End of operand matching loop
  2213. if (match == 0)
  2214. {
  2215. // Exact match found
  2216. if (attempt > 0)
  2217. {
  2218. --attempt; // Well, try to find yet another match
  2219. nameok = 0;
  2220. }
  2221. else
  2222. {
  2223. break;
  2224. }
  2225. }
  2226. } // End of command search loop
  2227. // Check whether some error was detected. If several errors were found
  2228. // similtaneously, report one (roughly in order of significance).
  2229. if (nameok == 0)
  2230. {
  2231. // Mnemonic unavailable
  2232. strcpy(errtext, ("Unrecognized command"));
  2233. m_pAsmCmd = nameend;
  2234. goto error;
  2235. }
  2236. if (match != 0)
  2237. {
  2238. // Command not found
  2239. if (minop > 0 && aop[minop - 1].type == NNN)
  2240. {
  2241. strcpy(errtext, ("Too few operands"));
  2242. }
  2243. else if (maxop < 3 && aop[maxop].type != NNN)
  2244. {
  2245. strcpy(errtext, ("Too many operands"));
  2246. }
  2247. else if (nameok > 1) // More that 1 command
  2248. {
  2249. strcpy(errtext, ("Command does not support given operands"));
  2250. }
  2251. else if (match & MA_JMP)
  2252. {
  2253. strcpy(errtext, ("Invalid jump size modifier"));
  2254. }
  2255. else if (match & MA_NOP)
  2256. {
  2257. strcpy(errtext, ("Wrong number of operands"));
  2258. }
  2259. else if (match & MA_TYP)
  2260. {
  2261. strcpy(errtext, ("Command does not support given operands"));
  2262. }
  2263. else if (match & MA_NOS)
  2264. {
  2265. strcpy(errtext, ("Please specify operand size"));
  2266. }
  2267. else if (match & MA_SIZ)
  2268. {
  2269. strcpy(errtext, ("Bad operand size"));
  2270. }
  2271. else if (match & MA_DIF)
  2272. {
  2273. strcpy(errtext, ("Different size of operands"));
  2274. }
  2275. else if (match & MA_SEG)
  2276. {
  2277. strcpy(errtext, ("Invalid segment register"));
  2278. }
  2279. else if (match & MA_RNG)
  2280. {
  2281. strcpy(errtext, ("Constant out of expected range"));
  2282. }
  2283. else
  2284. {
  2285. strcpy(errtext, ("Erroneous command"));
  2286. }
  2287. //
  2288. goto error;
  2289. }
  2290. // Exact match found. Now construct the code.
  2291. hasrm = 0; // Whether command has ModR/M byte
  2292. hassib = 0; // Whether command has SIB byte
  2293. dispsize = 0; // Size of displacement (if any)
  2294. immsize = 0; // Size of immediate data (if any)
  2295. segment = SEG_UNDEF; // Necessary segment prefix
  2296. jmpsize = 0; // No relative jumps
  2297. //
  2298. memset(tcode, 0, sizeof(tcode));
  2299. *(ulong*)tcode = pd->code & pd->mask;
  2300. memset(tmask, 0, sizeof(tmask));
  2301. *(ulong*)tmask = pd->mask;
  2302. //
  2303. i = pd->len - 1; // Last byte of command itself
  2304. if (rep)
  2305. {
  2306. i++; // REPxx prefixes count as extra byte
  2307. }
  2308. // In some cases at least one operand must have explicit size declaration (as
  2309. // in MOV [EAX],1). This preliminary check does not include all cases.
  2310. if (pd->bits == WW || pd->bits == WS || pd->bits == WP)
  2311. {
  2312. if (datasize == 0)
  2313. {
  2314. strcpy(errtext, ("Please specify operand size"));
  2315. goto error;
  2316. }
  2317. else if (datasize > 1)
  2318. {
  2319. tcode[i] |= 0x01; // WORD or DWORD size of operands
  2320. //message(tcode[i]);
  2321. }
  2322. tmask[i] |= 0x01;
  2323. }
  2324. else if (pd->bits == W3)
  2325. {
  2326. if (datasize == 0)
  2327. {
  2328. strcpy(errtext, ("Please specify operand size"));
  2329. goto error;
  2330. }
  2331. else if (datasize > 1)
  2332. {
  2333. tcode[i] |= 0x08; // WORD or DWORD size of operands
  2334. }
  2335. tmask[i] |= 0x08;
  2336. }
  2337. // Present suffix of 3DNow! command as immediate byte operand.
  2338. if ((pd->type & C_TYPEMASK) == C_NOW)
  2339. {
  2340. immsize = 1;
  2341. immediate = (pd->code >> 16) & 0xFF;
  2342. }
  2343. // Process operands again, this time constructing the code.
  2344. anydisp = anyimm = anyjmp = 0;
  2345. for (j = 0; j < 3; j++)
  2346. {
  2347. // Up to 3 operands
  2348. op = aop + j;
  2349. if (j == 0)
  2350. {
  2351. arg = pd->arg1;
  2352. }
  2353. else if (j == 1)
  2354. {
  2355. arg = pd->arg2;
  2356. }
  2357. else
  2358. {
  2359. arg = pd->arg3;
  2360. }
  2361. if (arg == NNN)
  2362. {
  2363. break; // All operands processed
  2364. }
  2365. switch (arg)
  2366. {
  2367. case REG: // Integer register in Reg field
  2368. case RG4: // Integer 4-byte register in Reg field
  2369. case RMX: // MMX register MMx
  2370. case R3D: // 3DNow! register MMx
  2371. case CRX: // Control register CRx
  2372. case DRX: // Debug register DRx
  2373. {
  2374. hasrm = 1;
  2375. if (op->index < 8)
  2376. {
  2377. tcode[i + 1] |= (char)(op->index << 3);
  2378. tmask[i + 1] |= 0x38;
  2379. }
  2380. break;
  2381. }
  2382. case RCM: // Integer register in command byte
  2383. case RST: // FPU register (ST(i)) in command byte
  2384. {
  2385. if (op->index < 8)
  2386. {
  2387. tcode[i] |= (char)op->index;
  2388. tmask[i] |= 0x07;
  2389. }
  2390. break;
  2391. }
  2392. case RAC: // Accumulator (AL/AX/EAX, implicit)
  2393. case RAX: // AX (2-byte, implicit)
  2394. case RDX: // DX (16-bit implicit port address)
  2395. case RCL: // Implicit CL register (for shifts)
  2396. case RS0: // Top of FPU stack (ST(0))
  2397. case MDE: // Destination in string op's ([EDI])
  2398. case C01: // Implicit constant 1 (for shifts)
  2399. {
  2400. break; // Simply skip implicit operands
  2401. }
  2402. case MSO: // Source in string op's ([ESI])
  2403. case MXL: // XLAT operand ([EBX+AL])
  2404. {
  2405. if (op->segment != SEG_UNDEF && op->segment != SEG_DS)
  2406. {
  2407. segment = op->segment;
  2408. }
  2409. break;
  2410. }
  2411. case MRG: // Memory/register in ModRM byte
  2412. case MRJ: // Memory/reg in ModRM as JUMP target
  2413. case MR1: // 1-byte memory/register in ModRM byte
  2414. case MR2: // 2-byte memory/register in ModRM byte
  2415. case MR4: // 4-byte memory/register in ModRM byte
  2416. case RR4: // 4-byte memory/register (register only)
  2417. case MR8: // 8-byte memory/MMX register in ModRM
  2418. case RR8: // 8-byte MMX register only in ModRM
  2419. case MRD: // 8-byte memory/3DNow! register in ModRM
  2420. case RRD: // 8-byte memory/3DNow! (register only)
  2421. {
  2422. hasrm = 1;
  2423. if (op->type != MRG)
  2424. {
  2425. // Register in ModRM byte
  2426. tcode[i + 1] |= 0xC0;
  2427. tmask[i + 1] |= 0xC0;
  2428. if (op->index < 8)
  2429. {
  2430. tcode[i + 1] |= (char)op->index;
  2431. tmask[i + 1] |= 0x07;
  2432. }
  2433. break;
  2434. }
  2435. } // Note: NO BREAK, continue with address
  2436. case MMA: // Memory address in ModRM byte for LEA
  2437. case MML: // Memory in ModRM byte (for LES)
  2438. case MMS: // Memory in ModRM byte (as SEG:OFFS)
  2439. case MM6: // Memory in ModRm (6-byte descriptor)
  2440. case MMB: // Two adjacent memory locations (BOUND)
  2441. case MD2: // Memory in ModRM byte (16-bit integer)
  2442. case MB2: // Memory in ModRM byte (16-bit binary)
  2443. case MD4: // Memory in ModRM byte (32-bit integer)
  2444. case MD8: // Memory in ModRM byte (64-bit integer)
  2445. case MDA: // Memory in ModRM byte (80-bit BCD)
  2446. case MF4: // Memory in ModRM byte (32-bit float)
  2447. case MF8: // Memory in ModRM byte (64-bit float)
  2448. case MFA: // Memory in ModRM byte (80-bit float)
  2449. case MFE: // Memory in ModRM byte (FPU environment)
  2450. case MFS: // Memory in ModRM byte (FPU state)
  2451. case MFX: // Memory in ModRM byte (ext. FPU state)
  2452. {
  2453. hasrm = 1;
  2454. displacement = op->offset;
  2455. anydisp = op->anyoffset;
  2456. if (op->base < 0 && op->index < 0)
  2457. {
  2458. dispsize = 4; // Special case of immediate address
  2459. if (op->segment != SEG_UNDEF && op->segment != SEG_DS)
  2460. {
  2461. segment = op->segment;
  2462. }
  2463. tcode[i + 1] |= 0x05;
  2464. tmask[i + 1] |= 0xC7;
  2465. }
  2466. else if (op->index < 0 && op->base != REG_ESP)
  2467. {
  2468. tmask[i + 1] |= 0xC0; // SIB byte unnecessary
  2469. if (op->offset == 0 && op->anyoffset == 0 && op->base != REG_EBP)
  2470. {
  2471. }// [EBP] always requires offset
  2472. else if ((constsize & 1) != 0 && ((op->offset >= -128 && op->offset < 128) || op->anyoffset != 0))
  2473. {
  2474. tcode[i + 1] |= 0x40; // Disp8
  2475. dispsize = 1;
  2476. }
  2477. else
  2478. {
  2479. tcode[i + 1] |= 0x80; // Disp32
  2480. dispsize = 4;
  2481. }
  2482. if (op->base < 8)
  2483. {
  2484. if (op->segment != SEG_UNDEF && op->segment != g_addr32[op->base].defseg)
  2485. {
  2486. segment = op->segment;
  2487. }
  2488. tcode[i + 1] |= (char)op->base; // Note that case [ESP] has base<0.
  2489. tmask[i + 1] |= 0x07;
  2490. }
  2491. else
  2492. {
  2493. segment = op->segment;
  2494. }
  2495. }
  2496. else // SIB byte necessary
  2497. {
  2498. hassib = 1;
  2499. if (op->base == REG_EBP && // EBP as base requires offset, optimize
  2500. op->index >= 0 && op->scale == 1 && op->offset == 0 && op->anyoffset == 0)
  2501. {
  2502. op->base = op->index;
  2503. op->index = REG_EBP;
  2504. }
  2505. if (op->index == REG_ESP && // ESP cannot be an index, reorder
  2506. op->scale <= 1)
  2507. {
  2508. op->index = op->base;
  2509. op->base = REG_ESP;
  2510. op->scale = 1;
  2511. }
  2512. if (op->base < 0 && // No base means 4-byte offset, optimize
  2513. op->index >= 0 && op->scale == 2 && op->offset >= -128 && op->offset < 128 && op->anyoffset == 0)
  2514. {
  2515. op->base = op->index;
  2516. op->scale = 1;
  2517. }
  2518. if (op->index == REG_ESP)
  2519. {
  2520. // Reordering was unsuccessfull
  2521. strcpy(errtext, ("Invalid indexing mode"));
  2522. goto error;
  2523. }
  2524. if (op->base < 0)
  2525. {
  2526. tcode[i + 1] |= 0x04;
  2527. dispsize = 4;
  2528. }
  2529. else if (op->offset == 0 && op->anyoffset == 0 && op->base != REG_EBP)
  2530. {
  2531. tcode[i + 1] |= 0x04; // No displacement
  2532. }
  2533. else if ((constsize & 1) != 0 && ((op->offset >= -128 && op->offset < 128) || op->anyoffset != 0))
  2534. {
  2535. tcode[i + 1] |= 0x44; // Disp8
  2536. dispsize = 1;
  2537. }
  2538. else
  2539. {
  2540. tcode[i + 1] |= 0x84; // Disp32
  2541. dispsize = 4;
  2542. }
  2543. tmask[i + 1] |= 0xC7; // ModRM completed, proceed with SIB
  2544. if (op->scale == 2)
  2545. {
  2546. tcode[i + 2] |= 0x40;
  2547. }
  2548. else if (op->scale == 4)
  2549. {
  2550. tcode[i + 2] |= 0x80;
  2551. }
  2552. else if (op->scale == 8)
  2553. {
  2554. tcode[i + 2] |= 0xC0;
  2555. }
  2556. tmask[i + 2] |= 0xC0;
  2557. if (op->index < 8)
  2558. {
  2559. if (op->index < 0)
  2560. {
  2561. op->index = 0x04;
  2562. }
  2563. tcode[i + 2] |= (char)(op->index << 3);
  2564. tmask[i + 2] |= 0x38;
  2565. }
  2566. if (op->base < 8)
  2567. {
  2568. if (op->base < 0)
  2569. {
  2570. op->base = 0x05;
  2571. }
  2572. if (op->segment != SEG_UNDEF && op->segment != g_addr32[op->base].defseg)
  2573. {
  2574. segment = op->segment;
  2575. }
  2576. tcode[i + 2] |= (char)op->base;
  2577. tmask[i + 2] |= 0x07;
  2578. }
  2579. else
  2580. {
  2581. segment = op->segment;
  2582. }
  2583. }
  2584. break;
  2585. }
  2586. case IMM: // Immediate data (8 or 16/32)
  2587. case IMU: // Immediate unsigned data (8 or 16/32)
  2588. case VXD: // VxD service (32-bit only)
  2589. {
  2590. if (datasize == 0 && pd->arg2 == NNN && (pd->bits == SS || pd->bits == WS))
  2591. {
  2592. datasize = 4;
  2593. }
  2594. if (datasize == 0)
  2595. {
  2596. strcpy(errtext, ("Please specify operand size"));
  2597. goto error;
  2598. }
  2599. immediate = op->offset;
  2600. anyimm = op->anyoffset;
  2601. if (pd->bits == SS || pd->bits == WS)
  2602. {
  2603. if (datasize > 1 && (constsize & 2) != 0 &&
  2604. ((immediate >= -128 && immediate < 128) || op->anyoffset != 0))
  2605. {
  2606. immsize = 1;
  2607. tcode[i] |= 0x02;
  2608. }
  2609. else
  2610. {
  2611. immsize = datasize;
  2612. }
  2613. tmask[i] |= 0x02;
  2614. }
  2615. else
  2616. {
  2617. immsize = datasize;
  2618. }
  2619. break;
  2620. }
  2621. case IMX: // Immediate sign-extendable byte
  2622. case IMS: // Immediate byte (for shifts)
  2623. case IM1: // Immediate byte
  2624. {
  2625. if (immsize == 2) // To accomodate ENTER instruction
  2626. {
  2627. immediate = (immediate & 0xFFFF) | (op->offset << 16);
  2628. }
  2629. else
  2630. {
  2631. immediate = op->offset;
  2632. }
  2633. anyimm |= op->anyoffset;
  2634. immsize++;
  2635. break;
  2636. }
  2637. case IM2: // Immediate word (ENTER/RET)
  2638. {
  2639. immediate = op->offset;
  2640. anyimm = op->anyoffset;
  2641. immsize = 2;
  2642. break;
  2643. }
  2644. case IMA: // Immediate absolute near data address
  2645. {
  2646. if (op->segment != SEG_UNDEF && op->segment != SEG_DS)
  2647. {
  2648. segment = op->segment;
  2649. }
  2650. displacement = op->offset;
  2651. anydisp = op->anyoffset;
  2652. dispsize = 4;
  2653. break;
  2654. }
  2655. case JOB: // Immediate byte offset (for jumps)
  2656. {
  2657. jmpoffset = op->offset;
  2658. anyjmp = op->anyoffset;
  2659. jmpsize = 1;
  2660. break;
  2661. }
  2662. case JOW: // Immediate full offset (for jumps)
  2663. {
  2664. jmpoffset = op->offset;
  2665. anyjmp = op->anyoffset;
  2666. jmpsize = 4;
  2667. break;
  2668. }
  2669. case JMF: // Immediate absolute far jump/call addr
  2670. {
  2671. displacement = op->offset;
  2672. anydisp = op->anyoffset;
  2673. dispsize = 4;
  2674. immediate = op->segment;
  2675. anyimm = op->anyoffset;
  2676. immsize = 2;
  2677. break;
  2678. }
  2679. case SGM: // Segment register in ModRM byte
  2680. {
  2681. hasrm = 1;
  2682. if (op->index < 6)
  2683. {
  2684. tcode[i + 1] |= (char)(op->index << 3);
  2685. tmask[i + 1] |= 0x38;
  2686. }
  2687. break;
  2688. }
  2689. case SCM: // Segment register in command byte
  2690. {
  2691. if (op->index == SEG_FS || op->index == SEG_GS)
  2692. {
  2693. tcode[0] = (char)(0x0F);
  2694. tmask[0] = (char)(0xFF);
  2695. i = 1;
  2696. if (strcmp(name, ("PUSH")) == 0)
  2697. {
  2698. tcode[i] = (char)((op->index << 3) | 0x80);
  2699. }
  2700. else
  2701. {
  2702. tcode[i] = (char)((op->index << 3) | 0x81);
  2703. }
  2704. tmask[i] = (char)(0xFF);
  2705. }
  2706. else if (op->index < 6)
  2707. {
  2708. if (op->index == SEG_CS && strcmp(name, ("POP")) == 0)
  2709. {
  2710. strcpy(errtext, ("Unable to POP CS"));
  2711. goto error;
  2712. }
  2713. tcode[i] = (char)((tcode[i] & 0xC7) | (op->index << 3));
  2714. }
  2715. else
  2716. {
  2717. tcode[i] &= 0xC7;
  2718. tmask[i] &= 0xC7;
  2719. }
  2720. break;
  2721. }
  2722. case PRN: // Near return address (pseudooperand)
  2723. case PRF: // Far return address (pseudooperand)
  2724. case PAC: // Accumulator (AL/AX/EAX, pseudooperand)
  2725. case PAH: // AH (in LAHF/SAHF, pseudooperand)
  2726. case PFL: // Lower byte of flags (pseudooperand)
  2727. case PS0: // Top of FPU stack (pseudooperand)
  2728. case PS1: // ST(1) (pseudooperand)
  2729. case PCX: // CX/ECX (pseudooperand)
  2730. case PDI: // EDI (pseudooperand in MMX extentions)
  2731. {
  2732. break; // Simply skip preudooperands
  2733. }
  2734. default: // Undefined type of operand
  2735. {
  2736. strcpy(errtext, ("Internal Assembler error"));
  2737. goto error;
  2738. }
  2739. }
  2740. }
  2741. // Gather parts of command together in the complete command.
  2742. j = 0;
  2743. if (lock != 0)
  2744. {
  2745. // Lock prefix specified
  2746. model->code[j] = (char)0xF0;
  2747. model->mask[j] = (char)0xFF;
  2748. j++;
  2749. }
  2750. if (datasize == 2 && pd->bits != FF)
  2751. {
  2752. // Data size prefix necessary
  2753. model->code[j] = (char)0x66;
  2754. model->mask[j] = (char)0xFF;
  2755. j++;
  2756. }
  2757. if (addrsize == 2)
  2758. {
  2759. // Address size prefix necessary
  2760. model->code[j] = (char)0x67;
  2761. model->mask[j] = (char)0xFF;
  2762. j++;
  2763. }
  2764. if (segment != SEG_UNDEF)
  2765. {
  2766. // Segment prefix necessary
  2767. if (segment == SEG_ES)
  2768. {
  2769. model->code[j] = 0x26;
  2770. }
  2771. else if (segment == SEG_CS)
  2772. {
  2773. model->code[j] = 0x2E;
  2774. }
  2775. else if (segment == SEG_SS)
  2776. {
  2777. model->code[j] = 0x36;
  2778. }
  2779. else if (segment == SEG_DS)
  2780. {
  2781. model->code[j] = 0x3E;
  2782. }
  2783. else if (segment == SEG_FS)
  2784. {
  2785. model->code[j] = 0x64;
  2786. }
  2787. else if (segment == SEG_GS)
  2788. {
  2789. model->code[j] = 0x65;
  2790. }
  2791. else
  2792. {
  2793. strcpy(errtext, ("Internal Assembler error"));
  2794. goto error;
  2795. }
  2796. model->mask[j] = (char)0xFF;
  2797. j++;
  2798. }
  2799. if (dispsize > 0)
  2800. {
  2801. memcpy(tcode + i + 1 + hasrm + hassib, &displacement, dispsize);
  2802. if (anydisp == 0)
  2803. {
  2804. memset(tmask + i + 1 + hasrm + hassib, 0xFF, dispsize);
  2805. }
  2806. }
  2807. if (immsize > 0)
  2808. {
  2809. if (immsize == 1)
  2810. {
  2811. l = 0xFFFFFF00L;
  2812. }
  2813. else if (immsize == 2)
  2814. {
  2815. l = 0xFFFF0000L;
  2816. }
  2817. else
  2818. {
  2819. l = 0L;
  2820. }
  2821. if ((immediate & l) != 0 && (immediate & l) != l)
  2822. {
  2823. strcpy(errtext, ("Constant does not fit into operand"));
  2824. goto error;
  2825. }
  2826. memcpy(tcode + i + 1 + hasrm + hassib + dispsize, &immediate, immsize);
  2827. if (anyimm == 0)
  2828. {
  2829. memset(tmask + i + 1 + hasrm + hassib + dispsize, 0xFF, immsize);
  2830. }
  2831. }
  2832. //
  2833. i = i + 1 + hasrm + hassib + dispsize + immsize;
  2834. jmpoffset = jmpoffset - (i + j + jmpsize);
  2835. model->jmpsize = jmpsize;
  2836. model->jmpoffset = jmpoffset;
  2837. model->jmppos = i + j;
  2838. //
  2839. if (jmpsize != 0)
  2840. {
  2841. if (ip != 0)
  2842. {
  2843. jmpoffset = jmpoffset - ip;
  2844. if (jmpsize == 1 && anyjmp == 0 && (jmpoffset < -128 || jmpoffset >= 128))
  2845. {
  2846. if (longjump == 0 && (jmpmode & 0x03) == 0)
  2847. {
  2848. longjump = 1;
  2849. goto retrylongjump;
  2850. }
  2851. sprintf(errtext, ("Relative jump out of range, use %s LONG form"), name);
  2852. goto error;
  2853. }
  2854. memcpy(tcode + i, &jmpoffset, jmpsize);
  2855. }
  2856. if (anyjmp == 0)
  2857. {
  2858. memset(tmask + i, 0xFF, jmpsize);
  2859. }
  2860. i += jmpsize;
  2861. }
  2862. //
  2863. memcpy(model->code + j, tcode, i);
  2864. memcpy(model->mask + j, tmask, i);
  2865. i += j;
  2866. model->length = i;
  2867. return i; // Positive value: length of code
  2868. error:
  2869. model->length = 0;
  2870. return cmd - m_pAsmCmd; // Negative value: position of error
  2871. }