sigh...
An outline to count chars / words:
This is the basis for an TMCE implementation which will be included in my next VizBook major upgrade (a guest book style program in Perl been around for years) and ListMerge Perl newsletter mailer major upgrade. This works well and quick even tested parsing very large input.
//various global vars and reference values
// included some here to give idea of what I have been doing
// can easily be implemented for multi textareas on same page
var element_id_Of_This_TextArea = 'msg';
var myCharCountDiv = 'charcnt';
var myWordCountDiv = 'wordcnt';
var myMaxLengthIs = 2000;
var useMyLength = true;
var myMaxWordsIs = 500;
var useMyWords = true; // **
// **using 'useMyWords' overrides the character limit option
var stoppit = false, stoppit2 = false;
var editorID;
var rootdom = 'http://dom.dom/';
var stoppit = false, stoppit2 = false;
tinyMCE.init({
// General options //"textareas",
mode : "exact",
elements : element_id_Of_This_TextArea, //"msg",
theme : "advanced",
skin : "vbook",
plugins : "emotions,inlinepopups,save",
theme_advanced_buttons1 : "bold,italic,underline,strikethrough,|,justifyleft,justifycenter,justifyright,bullist,numlist,outdent,indent,hr,|,forecolor,backcolor,emotions,|,placemailpic,|,undo,redo,removeformat,cleanup",
theme_advanced_buttons2 : "",
theme_advanced_buttons3 : "",
theme_advanced_buttons4 : "",
theme_advanced_toolbar_location : "top",
theme_advanced_toolbar_align : "left",
theme_advanced_statusbar_location : "none",
theme_advanced_resizing : false,
convert_urls : true,
relative_urls : false,
remove_script_host : false,
document_base_url : rootdom,
// Example content CSS (should be your site CSS)
content_css : "tmce/css/vbook.css?" + new Date().getTime(),
force_p_newlines : true,
force_br_newlines : false,
cleanup_on_startup : true,
cleanup: true,
entity_encoding : "numeric", //"numeric" "named" "raw"
object_resizing : false,
force_hex_style_colors : true,
paste_remove_spans : true,
paste_strip_class_attributes : "all",
paste_remove_styles : true,
paste_auto_cleanup_on_paste : true,
paste_block_drop : true,
paste_text_use_dialog : false,
convert_fonts_to_spans : true,
dialog_type : "modal",
cleanup_callback: 'myCustomCleanup',
remove_linebreaks : true,
valid_elements : blah blah,
//###############
setup : function(ed) {
ed.onPaste.add(function(ed, e) {
//alert('onPaste '+ed+' '+e);
//var copyContent = myCustomSetContent();
//tinyMCE.execCommand('mceSetContent',true,copyContent);
myCustomOnChangeHandler();
});
ed.onInit.add(function(ed) {
var copyContent = myCustomSetFixedContent();
tinyMCE.execCommand('mceSetContent',true,copyContent);
myCustomOnChangeHandler();
showCounters();
});
ed.onKeyDown.add(
function(ed, e) {
}//end function(ed,e)
);//end ed.onKeyDown.add
ed.onChange.add(
function(ed, e) {
myCustomOnChangeHandler();
}//end function(ed,e)
);//end ed.onChange.add
ed.onClick.add(
function(ed, e) {
}//end function(ed,e)
);//end ed.onClick.add
ed.onReset.add(
function(ed, e) {
myCustomOnChangeHandler();
}//end function(ed,e)
);//end ed.onReset.add
ed.onUndo.add(
function(ed, e) {
myCustomOnChangeHandler();
}//end function(ed,e)
);//end ed.onUndo.add
ed.onRedo.add(
function(ed, e) {
myCustomOnChangeHandler();
}//end function(ed,e)
);//end ed.onRedo.add
ed.onKeyUp.add(
function(ed, e) {
myCustomOnChangeHandler();
}//end function(ed,e)
);//end ed.onKeyUp.add
editorID = ed.id;
},//end setup : function(ed)
//##################################
spellchecker_languages : "+English=en",
spellchecker_rpc_url : "/tmce/spellchecker/rpc.php",
init_instance_callback : showstuff()
});
Note the "entity_encoding..." above. That makes it easy to detect entities and convert to single chars.
The counting html parser "function myCustomOnChangeHandler()" begins with...
var TextAreaLength = tinyMCE.activeEditor.getBody().innerHTML.replace(/(\ |\ )+/ig, ' ').replace(/( )+/ig, ' ');
... calling active editor only once with the rest parsing an in-memory variable (TextAreaLength).
That is followed by extensive stripping of various html tags links embeds framesets iframes scripts meta and etc with multiple RegX finishing with...
TextAreaLength = TextAreaLength.replace(/\s+/, ' ');
TextAreaLength = TextAreaLength.replace(/ +/g, ' ');
TextAreaLength = TextAreaLength.replace(/(^\s+|\s+$)/g, '');
So the in memory var ends up with only single characters (some '+' characters and spaces).
Because the entities are forced to "numeric" it is easy to simply...
TextAreaLength = TextAreaLength.replace(/(\&#\d+;)/g, '+');
...converting them to single characters for counting as single chars too.
Words can be easily counted by...
wcount = TextAreaLength.match(/ /g);
...(per wcount.length) after all conversions of superfluous parts are converted to spaces and multiple spaces to singles.
The entire RegX block (mine is many lines) can easily be converted to your preferred server side language so the server side length checking matches that of the JavaScript function (server side checking is FAR more important than client side but its a nice touch for your visitors for you to use both)
Finally in the "myCustomOnChangeHandler()" function the count / warning results are written to innerHTML of a couple of in-your-face larg"ish" DIVs, colour changing PLUS form submission is blocked when limits are reached (and vicky-vercky).
Forget trying to stop further input after max reached as that will cause havoc with the cursor position and drive users nuts; very bad for successful conversions whether part of a transaction form or just info entry form as p****d off users historically VERY often give up and leave.
My implementation also includes a warning when the entire un-parsed input length tags and all exceeds the allowed char max X 2 (or 3.5; you do your own tests to decide that). This entails appearence of another div in RED warning "input is too complex, please simplify" (occasionaly some wallies / morons will paste only html tags just for fun??).
That is about it in a nut shell. My modules include quite a bit more bits and pieces but individuals will want to do their own thing. Just don't try and be too pedantic and work with what TMCE offers; warnings are better than buggy input blocks. Every browser's editor methods create different html code. So that's why TMCE (and other editors) attempt to (forcefully) bring them in to some sort of common output so work with it not against it.
There are MANY different reasons we may want to apply some sort of char / word counting restrictions / notifications and NO ONE has the right to arrogantly put you down or argue your needs if that's what you decide is what you need to do.
As my new sites are still some time off (and therefore so too the inclusion of TMCE text editing methods in a couple of upgraded programs) I hope that helps some put their own counting parser processors together. Try it this way it works well.
Regards.
============================================
PS To observe your parser during development add a plain texarea to your form...
<textarea name="fred" id="fred" rows="10" cols="60"></textarea>
...and write to it at bottom of myCustomOnChangeHandler() function...
var observeobj = idIs( 'fred' );
observeobj.innerHTML = TextAreaLength;
...helped by this object finder function in the page head...
function idIs( id ){ //returnObjById
var object = null;
if(document.getElementById) {object = document.getElementById(id);}
else if(document.all) {object = document.all[id];}
else if(document.layers) {object = document.layers[id];}
return object;
}
Also consider copying html code from random real web pages and paste them into your TMCE form; it really helps as you will otherwise invariably enter code as you'd expect it to be.
Last edited by Wombat_AU (2011-01-27 20:21:37)