Text Practice Mode
This is who i;m ?
created Sep 26th, 16:08 by Anshul Kumar
1
414 words
4 completed
0
Rating visible after 3 or more votes
saving score / loading statistics ...
00:00
# Typing Practice ML Model for 10fastfingers
# ------------------------------------------
# This script learns which words you are most likely to mistype
# and recommends practice words for you.
#
# How to use:
# 1. Run it once (it uses simulated logs).
# 2. Later replace the synthetic logs with your real typing logs.
# 3. The model will adapt and generate new practice words.
import random, math, csv
from collections import Counter
try:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
SKLEARN_AVAILABLE = True
except:
SKLEARN_AVAILABLE = False
# --------------------------
# 1. Simulate Typing Logs
# --------------------------
letters = list("abcdefghijklmnopqrstuvwxyz")
hard_letters = set(['t','r','y','u','i','o']) # pretend these are harder
hard_bigrams = set(['th','tr','ri','yu','oi','ht'])
def simulate_log_entry(word):
entries = []
elapsed = 50
for i,ch in enumerate(word):
prev = word[i-1] if i>0 else '<s>'
p = 0.01
if ch in hard_letters: p += 0.12
if (prev+ch) in hard_bigrams: p += 0.10
if i==0 or i==len(word)-1: p -= 0.005
is_upper = ch.isupper()
if is_upper: p += 0.05
error = 1 if random.random() < p else 0
entries.append({
'prev_char': prev.lower(),
'char': ch.lower(),
'position': i / max(1,len(word)-1),
'is_upper': int(is_upper),
'elapsed_ms': elapsed,
'error': error
})
elapsed += 30
return entries
sample_words = [
"the","and","you","that","practice","keyboard","accuracy","mistake",
"beautiful","synchronize","probability","algorithm","statistics",
"performance","improve","predict","tough","rhythm"
]
logs = []
for _ in range(1000):
w = random.choice(sample_words)
logs.extend(simulate_log_entry(w))
# --------------------------
# 2. Feature Extraction
# --------------------------
def extract_features(entry):
return {
'char=' + entry['char']: 1,
'prev_char=' + entry['prev_char']: 1,
'bigram=' + entry['prev_char']+entry['char']: 1,
'pos_bucket=' + str(int(entry['position']*4)): 1,
}
X_dicts = [extract_features(e) for e in logs]
y = [e['error'] for e in logs]
# --------------------------
# 3. Train Model
# --------------------------
if SKLEARN_AVAILABLE:
vec = DictVectorizer(sparse=False)
X = vec.fit_transform(X_dicts)
clf = LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear')
clf.fit(X, y)
def predict_error_prob(entry):
return float(clf.predict_proba(vec.transform([extract_features(entry)]))[0,1])
print("Model: LogisticRegression")
else:
counts, errors = Counter(), Counter()
for d,label in zip(X_dicts,y):
for k in d:
counts[k]+=1
if label: errors[k]+=1
def predict_error_prob(entry):
feats = extract_features(entry)
rates = []
for k in feats:
rate = (errors[k]+1)/(counts[k]+2) if counts[k]>0 else 0.02
rates.append(rate)
return sum(rates)/len(rates)
print("Model: Frequency baseline")
# --------------------------
# 4. Score Words
# --------------------------
def word_score(word):
probs = []
for i,ch in enumerate(word):
prev = word[i-1] if i>0 else '<s>'
entry = {
'prev_char': prev.lower(),
'char': ch.lower(),
'position': i / max(1,len(word)-1),
'is_upper': int(ch.isupper()),
'elapsed_ms': 50
}
probs.append(predict_error_prob(entry))
return sum(probs)
scored = [(w, word_score(w)) for w in sample_words]
scored.sort(key=lambda x: -x[1])
# --------------------------
# 5. Show Results
# --------------------------
print("\nTop Practice Words:\n")
for w,s in scored:
print(f"{w:12s} — {s:.3f}")
# Save to file
with open("practice_words.txt","w") as f:
for w,s in scored:
f.write(f"{w}\t{s:.6f}\n")
print("\nSaved to practice_words.txt")
# ------------------------------------------
# This script learns which words you are most likely to mistype
# and recommends practice words for you.
#
# How to use:
# 1. Run it once (it uses simulated logs).
# 2. Later replace the synthetic logs with your real typing logs.
# 3. The model will adapt and generate new practice words.
import random, math, csv
from collections import Counter
try:
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
SKLEARN_AVAILABLE = True
except:
SKLEARN_AVAILABLE = False
# --------------------------
# 1. Simulate Typing Logs
# --------------------------
letters = list("abcdefghijklmnopqrstuvwxyz")
hard_letters = set(['t','r','y','u','i','o']) # pretend these are harder
hard_bigrams = set(['th','tr','ri','yu','oi','ht'])
def simulate_log_entry(word):
entries = []
elapsed = 50
for i,ch in enumerate(word):
prev = word[i-1] if i>0 else '<s>'
p = 0.01
if ch in hard_letters: p += 0.12
if (prev+ch) in hard_bigrams: p += 0.10
if i==0 or i==len(word)-1: p -= 0.005
is_upper = ch.isupper()
if is_upper: p += 0.05
error = 1 if random.random() < p else 0
entries.append({
'prev_char': prev.lower(),
'char': ch.lower(),
'position': i / max(1,len(word)-1),
'is_upper': int(is_upper),
'elapsed_ms': elapsed,
'error': error
})
elapsed += 30
return entries
sample_words = [
"the","and","you","that","practice","keyboard","accuracy","mistake",
"beautiful","synchronize","probability","algorithm","statistics",
"performance","improve","predict","tough","rhythm"
]
logs = []
for _ in range(1000):
w = random.choice(sample_words)
logs.extend(simulate_log_entry(w))
# --------------------------
# 2. Feature Extraction
# --------------------------
def extract_features(entry):
return {
'char=' + entry['char']: 1,
'prev_char=' + entry['prev_char']: 1,
'bigram=' + entry['prev_char']+entry['char']: 1,
'pos_bucket=' + str(int(entry['position']*4)): 1,
}
X_dicts = [extract_features(e) for e in logs]
y = [e['error'] for e in logs]
# --------------------------
# 3. Train Model
# --------------------------
if SKLEARN_AVAILABLE:
vec = DictVectorizer(sparse=False)
X = vec.fit_transform(X_dicts)
clf = LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear')
clf.fit(X, y)
def predict_error_prob(entry):
return float(clf.predict_proba(vec.transform([extract_features(entry)]))[0,1])
print("Model: LogisticRegression")
else:
counts, errors = Counter(), Counter()
for d,label in zip(X_dicts,y):
for k in d:
counts[k]+=1
if label: errors[k]+=1
def predict_error_prob(entry):
feats = extract_features(entry)
rates = []
for k in feats:
rate = (errors[k]+1)/(counts[k]+2) if counts[k]>0 else 0.02
rates.append(rate)
return sum(rates)/len(rates)
print("Model: Frequency baseline")
# --------------------------
# 4. Score Words
# --------------------------
def word_score(word):
probs = []
for i,ch in enumerate(word):
prev = word[i-1] if i>0 else '<s>'
entry = {
'prev_char': prev.lower(),
'char': ch.lower(),
'position': i / max(1,len(word)-1),
'is_upper': int(ch.isupper()),
'elapsed_ms': 50
}
probs.append(predict_error_prob(entry))
return sum(probs)
scored = [(w, word_score(w)) for w in sample_words]
scored.sort(key=lambda x: -x[1])
# --------------------------
# 5. Show Results
# --------------------------
print("\nTop Practice Words:\n")
for w,s in scored:
print(f"{w:12s} — {s:.3f}")
# Save to file
with open("practice_words.txt","w") as f:
for w,s in scored:
f.write(f"{w}\t{s:.6f}\n")
print("\nSaved to practice_words.txt")
