如何提取特定字符前后嵌套括号的内容？

发布于 2025-01-11 19:41:15 字数 368 浏览 1 评论 0原文

在以下字符串中：

(10+10)*2*((1+1)*1)√(16)+(12*12)+2

我尝试将 ((1+1)*1)√(16) 替换为 nthroot(16,(1+1)*1)。
具体来说，我想提取 √ 两侧第一组括号中的所有内容。
括号本身可以包含多层括号和许多不同的符号。
语言是JavaScript。

我尝试了一些类似 .replace(/$(.+)$√$(.+)$/g, 'nthroot($1,$2)')
但我学习正则表达式的每一次尝试都失败了，我无法弄清楚这一点。

原文

In the following string:

(10+10)*2*((1+1)*1)√(16)+(12*12)+2

I am trying replace ((1+1)*1)√(16) with nthroot(16,(1+1)*1).
Specifically, I want to extract everything in the first sets of brackets on each side of the √.
The brackets themselves could contain many layers of brackets and many different symbols.
Language is JavaScript.

I tried a couple things like <str>.replace(/$(.+)$√$(.+)$/g, 'nthroot($1,$2)')
but every one of my attempts at learning RegEx fails and I can't figure this out.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

反目相谮 2025-01-18 19:41:15

我认为您目前无法使用 Javascript 中的正则表达式以通用方式解决此问题，因为您无法递归地匹配平衡括号。

就我个人而言，我会通过将文本拆分为其组成字符，构建括号组，然后用某种逻辑将所有内容重新连接在一起来解决此问题。例如：

let text = '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
let changedText = '';
let parts = text.split('');
let parCount = null;
let group = '';
let groups = [];

// Group the original text into nested parentheses and other characters.
for (let i = 0; i < parts.length; i++) {
    // Keep a track of parentheses nesting; if parCount is larger than 0,
    // then there are unclosed parentheses left in the current group.
    if (parts[i] == '(') parCount++;
    if (parts[i] == ')') parCount--;

    group += parts[i];

    // Add every group of balanced parens or single characters.
    if (parCount === 0 && group !== '') {
        groups.push(group);
        group = '';
    }
}

// Join groups, while replacing the root character and surrounding groups
// with the nthroot() syntax.
for (let i = 0; i < groups.length; i++) {
    let isRoot = i < groups.length - 2 && groups[i + 1] == '√';
    let hasParGroups = groups[i][0] == '(' && groups[i + 2][0] == '(';

    // If the next group is a root symbol surrounded by parenthesized groups,
    // join them using the nthroot() syntax.
    if (isRoot && hasParGroups) {
        let stripped = groups[i + 2].replace(/^\(|\)$/g, '');
        changedText += `nthroot(${stripped}, ${groups[i]})`;
        // Skip groups that belong to root.
        i = i + 2;
    } else {
        // Append non-root groups.
        changedText += groups[i]
    }
}

console.log('Before:', text, '\n', 'After:', changedText);

不过也不敢说它漂亮。 ;)

I don't think you can currently solve this in a general way with a regular expression in Javascript, since you can't match balanced parentheses recursively.

Personally, I'd approach this by splitting the text into its constituent characters, building groups of parentheses, and joining all back together with some logic. For example:

let text = '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
let changedText = '';
let parts = text.split('');
let parCount = null;
let group = '';
let groups = [];

// Group the original text into nested parentheses and other characters.
for (let i = 0; i < parts.length; i++) {
    // Keep a track of parentheses nesting; if parCount is larger than 0,
    // then there are unclosed parentheses left in the current group.
    if (parts[i] == '(') parCount++;
    if (parts[i] == ')') parCount--;

    group += parts[i];

    // Add every group of balanced parens or single characters.
    if (parCount === 0 && group !== '') {
        groups.push(group);
        group = '';
    }
}

// Join groups, while replacing the root character and surrounding groups
// with the nthroot() syntax.
for (let i = 0; i < groups.length; i++) {
    let isRoot = i < groups.length - 2 && groups[i + 1] == '√';
    let hasParGroups = groups[i][0] == '(' && groups[i + 2][0] == '(';

    // If the next group is a root symbol surrounded by parenthesized groups,
    // join them using the nthroot() syntax.
    if (isRoot && hasParGroups) {
        let stripped = groups[i + 2].replace(/^\(|\)$/g, '');
        changedText += `nthroot(${stripped}, ${groups[i]})`;
        // Skip groups that belong to root.
        i = i + 2;
    } else {
        // Append non-root groups.
        changedText += groups[i]
    }
}

console.log('Before:', text, '\n', 'After:', changedText);

Not saying it's pretty, though. ;)

回复收藏 0 原文

冷︶言冷语的世界 2025-01-18 19:41:15

解析任务，就像OP所要求的那样，不能仅用正则表达式来涵盖。

特别是令牌对嵌套括号的正确解析需要一个简单且无正则表达式的自定义解析过程。更重要的是，对于OP的用例，需要从左侧和右侧标记（由 √ 分隔的标记）中分别解析正确/有效的括号表达式）。

一种可能的方法可以基于单个 <代码>分割/减少与一些专门的辅助函数的协作任务...

// retrieves the correct parenthesized expression
// by counting parantheses from a token's left side.
function createFirstValidParenthesizedExpression(token) {
  let expression = '';

  if (token[0] === '(') { // if (token.at(0) === '(') {
    expression = '(';

    const charList = token.split('').slice(1);
    let char;

    let idx = -1;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[++idx]) !== undefined)
    ) {
      if (char === '(') {
        balance = balance + 1;
      } else if (char === ')') {
        balance = balance - 1;
      }
      expression = expression + char;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}
// retrieves the correct parenthesized expression
// by counting parantheses from a token's right side.
function createFirstValidParenthesizedExpressionFromRight(token) {
  let expression = '';

  if (token.slice(-1) === ')') { // if (token.at(-1) === ')') {
    expression = ')';

    const charList = token.split('').slice(0, -1);
    let char;

    let idx = charList.length;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[--idx]) !== undefined)
    ) {
      if (char === ')') {
        balance = balance + 1;
      } else if (char === '(') {
        balance = balance - 1;
      }
      expression = char + expression;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}

// helper which escapes all the possible math related
// characters which are also regex control characters.
function escapeExpressionChars(expression) {
  return expression.replace(/[-+*()/]/g, '\\amp;');
}

function createNthRootExpression(leftHandToken, rightHandToken) {
  leftHandToken = leftHandToken.trim();
  rightHandToken = rightHandToken.trim();

  // patterns that match partial 'nthroot' expressions
  // which are free of parentheses.
  const regXSimpleLeftHandExpression = /[\d*/]+$/;
  const regXSimpleRightHandExpression = /^[\d*/]+|^\([^+-]*\)/;

  // retrieve part of the future 'nthroot' expression
  // from the token to the left of '√'.
  const leftHandExpression =
    leftHandToken.match(regXSimpleLeftHandExpression)?.[0] ||
    createFirstValidParenthesizedExpressionFromRight(leftHandToken);

  // retrieve part of the future 'nthroot' expression
  // from the token to the right of '√'.
  const rightHandExpression =
    rightHandToken.match(regXSimpleRightHandExpression)?.[0] ||
    createFirstValidParenthesizedExpression(rightHandToken);

  leftHandToken = leftHandToken
    .replace(
      // remove the terminating match/expression from the token.
      RegExp(escapeExpressionChars(leftHandExpression) + '
.as-console-wrapper { min-height: 100%!important; top: 0; }

),
      '',
    );
  rightHandToken = rightHandToken
    .replace(
      // remove the starting match/expression from the token.
      RegExp('^' + escapeExpressionChars(rightHandExpression)),
      ''
    );

  return [

    leftHandToken,
    `nthroot(${ rightHandExpression },${ leftHandExpression })`,
    rightHandToken,

  ].join('');
}

const sampleExpressionOriginal =
  '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
const sampleExpressionEdgeCase =
  '(10+10)*2*((1+1)*1)√16+(12*12)+2√(4*(1+2))+3';

console.log("+++ processing the OP's expression +++")
console.log(
  'original value ...\n',
  sampleExpressionOriginal
);
console.log(
  'original value, after split ...',
  sampleExpressionOriginal
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionOriginal
    .split('√')
    .reduce(createNthRootExpression)
);
console.log('\n');

console.log("+++ processing a more edge case like expression +++")
console.log(
  'original value ...\n',
  sampleExpressionEdgeCase
);
console.log(
  'original value, after split ...',
  sampleExpressionEdgeCase
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionEdgeCase
    .split('√')
    .reduce(createNthRootExpression)
);

Parsing tasks, like what the OP is asking for, can not be covered by a regular expression alone.

Especially a token's correct parsing for nested parentheses needs a simple and regex free custom parsing process. Even more, as for the OP's use case one needs to parse a correct/valid parenthesized expression each from a left and a right hand-side token (the ones that are/were separated by √).

A possible approach could be based on a single split/reduce task with the collaboration of some specialized helper functions ...

// retrieves the correct parenthesized expression
// by counting parantheses from a token's left side.
function createFirstValidParenthesizedExpression(token) {
  let expression = '';

  if (token[0] === '(') { // if (token.at(0) === '(') {
    expression = '(';

    const charList = token.split('').slice(1);
    let char;

    let idx = -1;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[++idx]) !== undefined)
    ) {
      if (char === '(') {
        balance = balance + 1;
      } else if (char === ')') {
        balance = balance - 1;
      }
      expression = expression + char;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}
// retrieves the correct parenthesized expression
// by counting parantheses from a token's right side.
function createFirstValidParenthesizedExpressionFromRight(token) {
  let expression = '';

  if (token.slice(-1) === ')') { // if (token.at(-1) === ')') {
    expression = ')';

    const charList = token.split('').slice(0, -1);
    let char;

    let idx = charList.length;
    let balance = 1;

    while (
      (balance !== 0) &&
      ((char = charList[--idx]) !== undefined)
    ) {
      if (char === ')') {
        balance = balance + 1;
      } else if (char === '(') {
        balance = balance - 1;
      }
      expression = char + expression;
    }
    if (balance !== 0) {
      expression = '';
    }
  }
  return expression;
}

// helper which escapes all the possible math related
// characters which are also regex control characters.
function escapeExpressionChars(expression) {
  return expression.replace(/[-+*()/]/g, '\\amp;');
}

function createNthRootExpression(leftHandToken, rightHandToken) {
  leftHandToken = leftHandToken.trim();
  rightHandToken = rightHandToken.trim();

  // patterns that match partial 'nthroot' expressions
  // which are free of parentheses.
  const regXSimpleLeftHandExpression = /[\d*/]+$/;
  const regXSimpleRightHandExpression = /^[\d*/]+|^\([^+-]*\)/;

  // retrieve part of the future 'nthroot' expression
  // from the token to the left of '√'.
  const leftHandExpression =
    leftHandToken.match(regXSimpleLeftHandExpression)?.[0] ||
    createFirstValidParenthesizedExpressionFromRight(leftHandToken);

  // retrieve part of the future 'nthroot' expression
  // from the token to the right of '√'.
  const rightHandExpression =
    rightHandToken.match(regXSimpleRightHandExpression)?.[0] ||
    createFirstValidParenthesizedExpression(rightHandToken);

  leftHandToken = leftHandToken
    .replace(
      // remove the terminating match/expression from the token.
      RegExp(escapeExpressionChars(leftHandExpression) + '
.as-console-wrapper { min-height: 100%!important; top: 0; }

),
      '',
    );
  rightHandToken = rightHandToken
    .replace(
      // remove the starting match/expression from the token.
      RegExp('^' + escapeExpressionChars(rightHandExpression)),
      ''
    );

  return [

    leftHandToken,
    `nthroot(${ rightHandExpression },${ leftHandExpression })`,
    rightHandToken,

  ].join('');
}

const sampleExpressionOriginal =
  '(10+10)*2*((1+1)*1)√(16)+(12*12)+2';
const sampleExpressionEdgeCase =
  '(10+10)*2*((1+1)*1)√16+(12*12)+2√(4*(1+2))+3';

console.log("+++ processing the OP's expression +++")
console.log(
  'original value ...\n',
  sampleExpressionOriginal
);
console.log(
  'original value, after split ...',
  sampleExpressionOriginal
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionOriginal
    .split('√')
    .reduce(createNthRootExpression)
);
console.log('\n');

console.log("+++ processing a more edge case like expression +++")
console.log(
  'original value ...\n',
  sampleExpressionEdgeCase
);
console.log(
  'original value, after split ...',
  sampleExpressionEdgeCase
    .split('√')
);
console.log(
  'value, after "nthroot" creation ...\n',
  sampleExpressionEdgeCase
    .split('√')
    .reduce(createNthRootExpression)
);

回复收藏 0 原文

~没有更多了~